annotate bowtie_rRNA_removal_wrapper/bowtie_rRNA_tRNA_removal_wrapper.py @ 21:b49ad9f12dfa draft

Uploaded
author jackcurragh
date Wed, 11 May 2022 13:37:35 +0000
parents dbb3e98144fd
children ca7c965f7cc9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
1 #!/usr/bin/env python
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
2
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
3 """
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
4 Runs Bowtie on single-end or paired-end data.
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
5
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
6 usage: bowtie_wrapper.py [options]
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
7 -t, --threads=t: The number of threads to run
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
8 -o, --output=o: The output file
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
9 --output_unmapped_reads=: File name for unmapped reads (single-end)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
10 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
11 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
12 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
13 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
14 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
15 --output_mapping_stats=: File name for mapping statistics (output on stderr by bowtie)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
19 -2, --paired=2: Whether the data is single- or paired-end
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
20 -g, --genomeSource=g: The type of reference provided
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
21 -r, --ref=r: The reference genome to use or index
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
22 -s, --skip=s: Skip the first n reads
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
23 -a, --alignLimit=a: Only align the first n reads
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
28 -l, --seedLen=l: Seed length
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
44 -F, --offrate=F: Override the offrate of the index to n
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
45 -8, --snpphred=8: SNP penalty on Phred scale
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
48 -S, --seed=S: Seed for pseudo-random number generator
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
49 -C, --params=C: Whether to use default or specified parameters
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
50 -u, --iautoB=u: Automatic or specified behavior
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
54 -D, --idcv=D: The period for the difference-cover sample
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
62 -x, --indexSettings=x: Whether or not indexing options are to be set
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
63 -H, --suppressHeader=H: Suppress header
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
65 """
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
66
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
67 import optparse
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
68 import os
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
69 import shutil
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
70 import subprocess
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
71 import sys
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
72 import tempfile
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
73
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
74 # Allow more than Sanger encoded variants
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
75 DEFAULT_ASCII_ENCODING = '--phred33-quals'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
76 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = {'fastqsanger': '--phred33-quals', 'fastqillumina': '--phred64-quals', 'fastqsolexa': '--solexa-quals'}
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
77 # FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
78
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
79
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
80 def stop_err( msg ):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
81 sys.exit('%s\n' % msg)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
82
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
83
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
84 def __main__():
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
85 parser = optparse.OptionParser()
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
86 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
87 parser.add_option( '-o', '--output', dest='output', help='The output file' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
88 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
89 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
90 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
91 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
92 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
93 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
94 parser.add_option( '', '--output_mapping_stats', dest='output_mapping_stats', help='File for mapping statistics (i.e. stderr from bowtie)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
95 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
96 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
97 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
98 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
99 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
100 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
101 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
102 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
103 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
104 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
105 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
106 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
107 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
108 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
109 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
110 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
111 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
112 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
113 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
114 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
115 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
116 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
117 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
118 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
119 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
120 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
121 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
122 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
123 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
124 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
125 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
126 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
127 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
128 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
129 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
130 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
131 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
132 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
133 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
134 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
135 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
136 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
137 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
138 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
139 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
140 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
141 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
142 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
143 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
144 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
145 (options, args) = parser.parse_args()
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
146 if options.mismatchSeed and options.maxMismatches:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
147 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
148 stdout = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
149
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
150 # make temp directory for placement of indices and copy reference file there if necessary
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
151 tmp_index_dir = tempfile.mkdtemp()
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
152 # get type of data (solid or solexa)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
153 if options.dataType == 'solid':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
154 colorspace = '-C'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
155 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
156 colorspace = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
157 # index if necessary
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
158 if options.genomeSource == 'history' and not options.do_not_build_index:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
159 # set up commands
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
160 if options.index_settings == 'indexPreSet':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
161 indexing_cmds = '%s' % colorspace
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
162 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
163 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
164 if options.iautoB and options.iautoB == 'set':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
165 iautoB = '--noauto'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
166 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
167 iautoB = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
168 if options.ipacked and options.ipacked == 'packed':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
169 ipacked = '--packed'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
170 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
171 ipacked = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
172 if options.ibmax and int( options.ibmax ) >= 1:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
173 ibmax = '--bmax %s' % options.ibmax
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
174 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
175 ibmax = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
176 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
177 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
178 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
179 ibmaxdivn = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
180 if options.idcv and int( options.idcv ) >= 3:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
181 idcv = '--dcv %s' % options.idcv
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
182 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
183 idcv = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
184 if options.inodc and options.inodc == 'nodc':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
185 inodc = '--nodc'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
186 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
187 inodc = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
188 if options.inoref and options.inoref == 'noref':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
189 inoref = '--noref'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
190 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
191 inoref = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
192 if options.iftab and int( options.iftab ) >= 1:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
193 iftab = '--ftabchars %s' % options.iftab
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
194 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
195 iftab = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
196 if options.intoa and options.intoa == 'yes':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
197 intoa = '--ntoa'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
198 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
199 intoa = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
200 if options.iendian and options.iendian == 'big':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
201 iendian = '--big'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
202 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
203 iendian = '--little'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
204 if options.iseed and int( options.iseed ) > 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
205 iseed = '--seed %s' % options.iseed
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
206 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
207 iseed = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
208 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
209 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
210 inoref, options.ioffrate, iftab, intoa, iendian,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
211 iseed, colorspace )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
212 except ValueError as e:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
213 # clean up temp dir
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
214 if os.path.exists( tmp_index_dir ):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
215 shutil.rmtree( tmp_index_dir )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
216 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
217 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
218 ref_file_name = ref_file.name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
219 ref_file.close()
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
220 os.symlink( options.ref, ref_file_name )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
221 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
222 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
223 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
224 with open(tmp, 'w') as tmp_stderr:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
225 returncode = subprocess.call(args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
226 if returncode != 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
227 # get stderr, allowing for case where it's very large
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
228 stderr = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
229 buffsize = 1048576
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
230 with open(tmp, 'r') as tmp_stderr:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
231 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
232 while True:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
233 stderr += tmp_stderr.read(buffsize)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
234 if not stderr or len(stderr) % buffsize != 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
235 break
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
236 except OverflowError:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
237 pass
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
238 raise Exception(stderr)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
239 except Exception as e:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
240 # clean up temp dir
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
241 if os.path.exists( tmp_index_dir ):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
242 shutil.rmtree( tmp_index_dir )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
243 stop_err( 'Error indexing reference sequence\n' + str( e ) )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
244 stdout += 'File indexed. '
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
245 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
246 ref_file_name = options.ref
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
247 # set up aligning and generate aligning command options
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
248 # automatically set threads in both cases
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
249 tmp_suppressed_file_name = None
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
250 tmp_unmapped_file_name = None
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
251 if options.suppressHeader == 'true':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
252 suppressHeader = '--sam-nohead'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
253 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
254 suppressHeader = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
255 if options.maxInsert and int( options.maxInsert ) > 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
256 maxInsert = '-X %s' % options.maxInsert
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
257 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
258 maxInsert = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
259 if options.mateOrient:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
260 mateOrient = '--%s' % options.mateOrient
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
261 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
262 mateOrient = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
263 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
264 if options.params == 'preSet':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
265 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
266 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
267 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
268 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
269 if options.skip and int( options.skip ) > 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
270 skip = '-s %s' % options.skip
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
271 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
272 skip = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
273 if options.alignLimit and int( options.alignLimit ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
274 alignLimit = '-u %s' % options.alignLimit
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
275 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
276 alignLimit = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
277 if options.trimH and int( options.trimH ) > 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
278 trimH = '-5 %s' % options.trimH
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
279 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
280 trimH = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
281 if options.trimL and int( options.trimL ) > 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
282 trimL = '-3 %s' % options.trimL
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
283 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
284 trimL = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
285 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' or
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
286 options.maxMismatches == '2' or options.maxMismatches == '3'):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
287 maxMismatches = '-v %s' % options.maxMismatches
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
288 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
289 maxMismatches = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
290 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' or
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
291 options.mismatchSeed == '2' or options.mismatchSeed == '3'):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
292 mismatchSeed = '-n %s' % options.mismatchSeed
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
293 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
294 mismatchSeed = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
295 if options.mismatchQual and int( options.mismatchQual ) >= 1:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
296 mismatchQual = '-e %s' % options.mismatchQual
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
297 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
298 mismatchQual = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
299 if options.seedLen and int( options.seedLen ) >= 5:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
300 seedLen = '-l %s' % options.seedLen
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
301 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
302 seedLen = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
303 if options.rounding == 'noRound':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
304 rounding = '--nomaqround'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
305 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
306 rounding = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
307 if options.minInsert and int( options.minInsert ) > 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
308 minInsert = '-I %s' % options.minInsert
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
309 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
310 minInsert = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
311 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
312 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
313 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
314 maxAlignAttempt = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
315 if options.forwardAlign == 'noForward':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
316 forwardAlign = '--nofw'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
317 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
318 forwardAlign = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
319 if options.reverseAlign == 'noReverse':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
320 reverseAlign = '--norc'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
321 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
322 reverseAlign = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
323 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
324 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
325 maxBacktracks = '--maxbts %s' % options.maxBacktracks
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
326 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
327 maxBacktracks = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
328 if options.tryHard == 'doTryHard':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
329 tryHard = '-y'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
330 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
331 tryHard = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
332 if options.valAlign and int( options.valAlign ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
333 valAlign = '-k %s' % options.valAlign
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
334 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
335 valAlign = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
336 if options.allValAligns == 'doAllValAligns':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
337 allValAligns = '-a'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
338 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
339 allValAligns = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
340 if options.suppressAlign and int( options.suppressAlign ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
341 suppressAlign = '-m %s' % options.suppressAlign
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
342 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
343 suppressAlign = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
344 if options.best == 'doBest':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
345 best = '--best'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
346 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
347 best = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
348 if options.strata == 'doStrata':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
349 strata = '--strata'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
350 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
351 strata = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
352 if options.offrate and int( options.offrate ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
353 offrate = '-o %s' % options.offrate
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
354 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
355 offrate = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
356 if options.seed and int( options.seed ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
357 seed = '--seed %s' % options.seed
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
358 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
359 seed = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
360 if options.paired == 'paired':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
361 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
362 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
363 tmp_unmapped_file_name = tmp_unmapped_file.name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
364 tmp_unmapped_file.close()
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
365 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
366 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
367 output_unmapped_reads = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
368 if options.output_suppressed_reads:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
369 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
370 tmp_suppressed_file_name = tmp_suppressed_file.name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
371 tmp_suppressed_file.close()
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
372 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
373 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
374 output_suppressed_reads = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
375 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
376 if options.output_unmapped_reads:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
377 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
378 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
379 output_unmapped_reads = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
380 if options.output_suppressed_reads:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
381 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
382 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
383 output_suppressed_reads = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
384 snpfrac = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
385 if options.snpphred and int( options.snpphred ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
386 snpphred = '--snpphred %s' % options.snpphred
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
387 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
388 snpphred = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
389 if options.snpfrac and float( options.snpfrac ) >= 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
390 snpfrac = '--snpfrac %s' % options.snpfrac
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
391 if options.keepends and options.keepends == 'doKeepends':
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
392 keepends = '--col-keepends'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
393 else:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
394 keepends = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
395 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
396 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
397 ( maxInsert, mateOrient, options.threads, suppressHeader,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
398 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
399 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
400 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
401 tryHard, valAlign, allValAligns, suppressAlign, best,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
402 strata, offrate, seed, snpphred, snpfrac, keepends,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
403 output_unmapped_reads, output_suppressed_reads,
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
404 quality_score_encoding )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
405 except ValueError as e:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
406 # clean up temp dir
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
407 if os.path.exists( tmp_index_dir ):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
408 shutil.rmtree( tmp_index_dir )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
409 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
410 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
411 # have to nest try-except in try-finally to handle 2.4
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
412 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
413 # prepare actual mapping commands
21
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
414 if options.output != 'XXXX':
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
415 print('incorrect option')
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
416 if options.paired == 'paired':
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
417 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s | samtools view -b -S > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output, options.output )
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
418 else:
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
419 cmd2 = 'bowtie %s %s %s > %s | samtools view -b -S > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output, options.output )
12
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
420 else:
21
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
421 if options.paired == 'paired':
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
422 cmd2 = 'bowtie %s %s -1 %s -2 %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2 )
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
423 else:
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
424 print('correct option')
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
425 cmd2 = 'bowtie %s %s %s' % ( aligning_cmds, ref_file_name, options.input1 )
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
426 print(cmd2)
12
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
427 # align
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
428 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
429 with open(tmp, 'w') as tmp_stderr:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
430 returncode = subprocess.call(args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno())
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
431 # get stderr, allowing for case where it's very large
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
432 stderr = ''
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
433 buffsize = 1048576
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
434 with open(tmp, 'r') as tmp_stderr:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
435 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
436 while True:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
437 stderr += tmp_stderr.read(buffsize)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
438 if not stderr or len(stderr) % buffsize != 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
439 break
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
440 except OverflowError:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
441 pass
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
442 if returncode != 0:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
443 raise Exception(stderr)
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
444 elif options.output_mapping_stats is not None:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
445 # Write stderr (containing the mapping statistics) to a named file
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
446 with open(options.output_mapping_stats, 'w') as mapping_stats:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
447 mapping_stats.write( stderr )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
448 # get suppressed and unmapped reads output files in place if appropriate
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
449 if options.paired == 'paired' and tmp_suppressed_file_name and \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
450 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
451 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
452 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
453 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
454 shutil.move( left, options.output_suppressed_reads_l )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
455 shutil.move( right, options.output_suppressed_reads_r )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
456 except Exception as e:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
457 sys.stdout.write( 'Error producing the suppressed output file.\n' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
458 if options.paired == 'paired' and tmp_unmapped_file_name and \
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
459 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
460 try:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
461 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
462 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
463 shutil.move( left, options.output_unmapped_reads_l )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
464 shutil.move( right, options.output_unmapped_reads_r )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
465 except Exception as e:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
466 sys.stdout.write( 'Error producing the unmapped output file.\n' )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
467 # check that there are results in the output file
21
b49ad9f12dfa Uploaded
jackcurragh
parents: 12
diff changeset
468 if options.output != "XXXX" and os.path.getsize( options.output ) == 0:
12
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
469 raise Exception('The output file is empty, there may be an error with your input file or settings.')
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
470 except Exception as e:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
471 stop_err( 'Error aligning sequence. ' + str( e ) )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
472 finally:
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
473 # clean up temp dir
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
474 if os.path.exists( tmp_index_dir ):
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
475 shutil.rmtree( tmp_index_dir )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
476 stdout += 'Sequence file aligned.\n'
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
477 sys.stdout.write( stdout )
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
478
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
479
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
480 if __name__ == "__main__":
dbb3e98144fd Uploaded
jackcurragh
parents:
diff changeset
481 __main__()