annotate bowtie_wrapper.py @ 6:ecbbc8be6266 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit a326a3469adfd33babe792cf58544438582a56a1
author devteam
date Sun, 08 Jan 2017 08:01:08 -0500
parents 306077e393d4
children 2c811e06006a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
1 #!/usr/bin/env python
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
2
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
3 """
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
4 Runs Bowtie on single-end or paired-end data.
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
5
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
6 usage: bowtie_wrapper.py [options]
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
7 -t, --threads=t: The number of threads to run
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
8 -o, --output=o: The output file
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
9 --output_unmapped_reads=: File name for unmapped reads (single-end)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
10 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
11 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
12 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
13 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
14 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
5
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
15 --output_mapping_stats=: File name for mapping statistics (output on stderr by bowtie)
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
19 -2, --paired=2: Whether the data is single- or paired-end
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
20 -g, --genomeSource=g: The type of reference provided
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
21 -r, --ref=r: The reference genome to use or index
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
22 -s, --skip=s: Skip the first n reads
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
23 -a, --alignLimit=a: Only align the first n reads
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
28 -l, --seedLen=l: Seed length
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
44 -F, --offrate=F: Override the offrate of the index to n
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
45 -8, --snpphred=8: SNP penalty on Phred scale
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
48 -S, --seed=S: Seed for pseudo-random number generator
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
49 -C, --params=C: Whether to use default or specified parameters
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
50 -u, --iautoB=u: Automatic or specified behavior
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
54 -D, --idcv=D: The period for the difference-cover sample
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
62 -x, --indexSettings=x: Whether or not indexing options are to be set
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
63 -H, --suppressHeader=H: Suppress header
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
65 """
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
66
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
67 import optparse, os, shutil, subprocess, sys, tempfile
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
68
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
69 #Allow more than Sanger encoded variants
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
70 DEFAULT_ASCII_ENCODING = '--phred33-quals'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
73
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
74 def stop_err( msg ):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
75 sys.stderr.write( '%s\n' % msg )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
76 sys.exit()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
77
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
78 def __main__():
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
79 #Parse Command Line
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
80 parser = optparse.OptionParser()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
82 parser.add_option( '-o', '--output', dest='output', help='The output file' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
5
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
89 parser.add_option( '', '--output_mapping_stats', dest='output_mapping_stats', help='File for mapping statistics (i.e. stderr from bowtie)' )
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
90 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
91 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
92 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
93 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
94 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
95 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
96 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
97 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
98 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
99 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
100 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
101 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
102 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
103 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
104 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
105 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
106 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
107 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
108 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
109 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
110 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
111 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
112 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
113 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
114 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
115 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
116 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
117 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
118 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
119 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
120 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
121 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
122 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
123 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
124 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
125 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
126 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
127 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
128 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
129 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
130 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
131 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
132 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
133 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
134 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
135 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
136 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
137 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
138 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
139 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
140 (options, args) = parser.parse_args()
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
141 if options.mismatchSeed and options.maxMismatches:
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
142 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
143 stdout = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
144
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
145 # make temp directory for placement of indices and copy reference file there if necessary
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
146 tmp_index_dir = tempfile.mkdtemp()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
147 # get type of data (solid or solexa)
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
148 if options.dataType == 'solid':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
149 colorspace = '-C'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
150 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
151 colorspace = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
152 # index if necessary
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
153 if options.genomeSource == 'history' and not options.do_not_build_index:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
154 # set up commands
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
155 if options.index_settings =='indexPreSet':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
156 indexing_cmds = '%s' % colorspace
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
157 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
158 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
159 if options.iautoB and options.iautoB == 'set':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
160 iautoB = '--noauto'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
161 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
162 iautoB = ''
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
163 if options.ipacked and options.ipacked == 'packed':
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
164 ipacked = '--packed'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
165 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
166 ipacked = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
167 if options.ibmax and int( options.ibmax ) >= 1:
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
168 ibmax = '--bmax %s' % options.ibmax
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
169 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
170 ibmax = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
171 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
172 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
173 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
174 ibmaxdivn = ''
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
175 if options.idcv and int( options.idcv ) >= 3:
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
176 idcv = '--dcv %s' % options.idcv
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
177 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
178 idcv = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
179 if options.inodc and options.inodc == 'nodc':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
180 inodc = '--nodc'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
181 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
182 inodc = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
183 if options.inoref and options.inoref == 'noref':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
184 inoref = '--noref'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
185 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
186 inoref = ''
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
187 if options.iftab and int( options.iftab ) >= 1:
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
188 iftab = '--ftabchars %s' % options.iftab
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
189 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
190 iftab = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
191 if options.intoa and options.intoa == 'yes':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
192 intoa = '--ntoa'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
193 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
194 intoa = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
195 if options.iendian and options.iendian == 'big':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
196 iendian = '--big'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
197 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
198 iendian = '--little'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
199 if options.iseed and int( options.iseed ) > 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
200 iseed = '--seed %s' % options.iseed
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
201 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
202 iseed = ''
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
203 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
204 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
205 inoref, options.ioffrate, iftab, intoa, iendian,
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
206 iseed, colorspace )
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
207 except ValueError, e:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
208 # clean up temp dir
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
209 if os.path.exists( tmp_index_dir ):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
210 shutil.rmtree( tmp_index_dir )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
211 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
212 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
213 ref_file_name = ref_file.name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
214 ref_file.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
215 os.symlink( options.ref, ref_file_name )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
216 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
217 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
218 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
219 tmp_stderr = open( tmp, 'wb' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
220 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
221 returncode = proc.wait()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
222 tmp_stderr.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
223 # get stderr, allowing for case where it's very large
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
224 tmp_stderr = open( tmp, 'rb' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
225 stderr = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
226 buffsize = 1048576
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
227 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
228 while True:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
229 stderr += tmp_stderr.read( buffsize )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
230 if not stderr or len( stderr ) % buffsize != 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
231 break
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
232 except OverflowError:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
233 pass
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
234 tmp_stderr.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
235 if returncode != 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
236 raise Exception, stderr
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
237 except Exception, e:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
238 # clean up temp dir
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
239 if os.path.exists( tmp_index_dir ):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
240 shutil.rmtree( tmp_index_dir )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
241 stop_err( 'Error indexing reference sequence\n' + str( e ) )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
242 stdout += 'File indexed. '
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
243 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
244 ref_file_name = options.ref
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
245 # set up aligning and generate aligning command options
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
246 # automatically set threads in both cases
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
247 tmp_suppressed_file_name = None
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
248 tmp_unmapped_file_name = None
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
249 if options.suppressHeader == 'true':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
250 suppressHeader = '--sam-nohead'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
251 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
252 suppressHeader = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
253 if options.maxInsert and int( options.maxInsert ) > 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
254 maxInsert = '-X %s' % options.maxInsert
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
255 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
256 maxInsert = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
257 if options.mateOrient:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
258 mateOrient = '--%s' % options.mateOrient
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
259 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
260 mateOrient = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
261 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
262 if options.params == 'preSet':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
263 aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
264 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
265 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
266 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
267 if options.skip and int( options.skip ) > 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
268 skip = '-s %s' % options.skip
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
269 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
270 skip = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
271 if options.alignLimit and int( options.alignLimit ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
272 alignLimit = '-u %s' % options.alignLimit
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
273 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
274 alignLimit = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
275 if options.trimH and int( options.trimH ) > 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
276 trimH = '-5 %s' % options.trimH
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
277 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
278 trimH = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
279 if options.trimL and int( options.trimL ) > 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
280 trimL = '-3 %s' % options.trimL
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
281 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
282 trimL = ''
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
283 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
284 or options.maxMismatches == '2' or options.maxMismatches == '3'):
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
285 maxMismatches = '-v %s' % options.maxMismatches
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
286 else:
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
287 maxMismatches = ''
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
288 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
289 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
290 mismatchSeed = '-n %s' % options.mismatchSeed
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
291 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
292 mismatchSeed = ''
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
293 if options.mismatchQual and int( options.mismatchQual ) >= 1:
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
294 mismatchQual = '-e %s' % options.mismatchQual
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
295 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
296 mismatchQual = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
297 if options.seedLen and int( options.seedLen ) >= 5:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
298 seedLen = '-l %s' % options.seedLen
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
299 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
300 seedLen = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
301 if options.rounding == 'noRound':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
302 rounding = '--nomaqround'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
303 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
304 rounding = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
305 if options.minInsert and int( options.minInsert ) > 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
306 minInsert = '-I %s' % options.minInsert
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
307 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
308 minInsert = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
309 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
310 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
311 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
312 maxAlignAttempt = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
313 if options.forwardAlign == 'noForward':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
314 forwardAlign = '--nofw'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
315 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
316 forwardAlign = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
317 if options.reverseAlign == 'noReverse':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
318 reverseAlign = '--norc'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
319 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
320 reverseAlign = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
321 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
322 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
323 maxBacktracks = '--maxbts %s' % options.maxBacktracks
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
324 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
325 maxBacktracks = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
326 if options.tryHard == 'doTryHard':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
327 tryHard = '-y'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
328 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
329 tryHard = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
330 if options.valAlign and int( options.valAlign ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
331 valAlign = '-k %s' % options.valAlign
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
332 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
333 valAlign = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
334 if options.allValAligns == 'doAllValAligns':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
335 allValAligns = '-a'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
336 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
337 allValAligns = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
338 if options.suppressAlign and int( options.suppressAlign ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
339 suppressAlign = '-m %s' % options.suppressAlign
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
340 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
341 suppressAlign = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
342 if options.best == 'doBest':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
343 best = '--best'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
344 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
345 best = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
346 if options.strata == 'doStrata':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
347 strata = '--strata'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
348 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
349 strata = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
350 if options.offrate and int( options.offrate ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
351 offrate = '-o %s' % options.offrate
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
352 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
353 offrate = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
354 if options.seed and int( options.seed ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
355 seed = '--seed %s' % options.seed
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
356 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
357 seed = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
358 if options.paired == 'paired':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
359 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
360 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
361 tmp_unmapped_file_name = tmp_unmapped_file.name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
362 tmp_unmapped_file.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
363 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
364 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
365 output_unmapped_reads = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
366 if options.output_suppressed_reads:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
367 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
368 tmp_suppressed_file_name = tmp_suppressed_file.name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
369 tmp_suppressed_file.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
370 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
371 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
372 output_suppressed_reads = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
373 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
374 if options.output_unmapped_reads:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
375 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
376 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
377 output_unmapped_reads = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
378 if options.output_suppressed_reads:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
379 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
380 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
381 output_suppressed_reads = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
382 snpfrac = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
383 if options.snpphred and int( options.snpphred ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
384 snpphred = '--snpphred %s' % options.snpphred
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
385 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
386 snpphred = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
387 if options.snpfrac and float( options.snpfrac ) >= 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
388 snpfrac = '--snpfrac %s' % options.snpfrac
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
389 if options.keepends and options.keepends == 'doKeepends':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
390 keepends = '--col-keepends'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
391 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
392 keepends = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
393 aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
394 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
395 ( maxInsert, mateOrient, options.threads, suppressHeader,
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
396 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
397 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
398 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
399 tryHard, valAlign, allValAligns, suppressAlign, best,
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
400 strata, offrate, seed, snpphred, snpfrac, keepends,
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
401 output_unmapped_reads, output_suppressed_reads,
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
402 quality_score_encoding )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
403 except ValueError, e:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
404 # clean up temp dir
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
405 if os.path.exists( tmp_index_dir ):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
406 shutil.rmtree( tmp_index_dir )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
407 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
408 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
409 # have to nest try-except in try-finally to handle 2.4
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
410 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
411 # prepare actual mapping commands
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
412 if options.paired == 'paired':
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
413 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
414 else:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
415 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
416 # align
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
417 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
418 tmp_stderr = open( tmp, 'wb' )
5
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
419 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stdout=sys.stdout, stderr=tmp_stderr.fileno() )
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
420 returncode = proc.wait()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
421 tmp_stderr.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
422 # get stderr, allowing for case where it's very large
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
423 tmp_stderr = open( tmp, 'rb' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
424 stderr = ''
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
425 buffsize = 1048576
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
426 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
427 while True:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
428 stderr += tmp_stderr.read( buffsize )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
429 if not stderr or len( stderr ) % buffsize != 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
430 break
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
431 except OverflowError:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
432 pass
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
433 tmp_stderr.close()
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
434 if returncode != 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
435 raise Exception, stderr
5
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
436 elif options.output_mapping_stats is not None:
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
437 # Write stderr (containing the mapping statistics) to a named file
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
438 with open(options.output_mapping_stats, 'w') as mapping_stats:
306077e393d4 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/bowtie_wrappers commit b2e1043bf4db38be490fec298a1829f8e4a1c48e
devteam
parents: 4
diff changeset
439 mapping_stats.write( stderr )
0
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
440 # get suppressed and unmapped reads output files in place if appropriate
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
441 if options.paired == 'paired' and tmp_suppressed_file_name and \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
442 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
443 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
444 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
445 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
446 shutil.move( left, options.output_suppressed_reads_l )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
447 shutil.move( right, options.output_suppressed_reads_r )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
448 except Exception, e:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
449 sys.stdout.write( 'Error producing the suppressed output file.\n' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
450 if options.paired == 'paired' and tmp_unmapped_file_name and \
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
451 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
452 try:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
453 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
454 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
455 shutil.move( left, options.output_unmapped_reads_l )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
456 shutil.move( right, options.output_unmapped_reads_r )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
457 except Exception, e:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
458 sys.stdout.write( 'Error producing the unmapped output file.\n' )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
459 # check that there are results in the output file
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
460 if os.path.getsize( options.output ) == 0:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
461 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
462 except Exception, e:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
463 stop_err( 'Error aligning sequence. ' + str( e ) )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
464 finally:
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
465 # clean up temp dir
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
466 if os.path.exists( tmp_index_dir ):
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
467 shutil.rmtree( tmp_index_dir )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
468 stdout += 'Sequence file aligned.\n'
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
469 sys.stdout.write( stdout )
0c7e4eadfb3c Uploaded tarball
devteam
parents:
diff changeset
470
4
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
471 if __name__ == "__main__":
df86f29bedee planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty
devteam
parents: 0
diff changeset
472 __main__()