Previous changeset 6:daf94bdef589 (2016-08-07) Next changeset 8:0c983549ad8c (2016-08-07) |
Commit message:
Uploaded |
added:
enhanced_bowtie_wrapper 1.0.0.py |
b |
diff -r daf94bdef589 -r 95f632feca4c enhanced_bowtie_wrapper 1.0.0.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/enhanced_bowtie_wrapper 1.0.0.py Sun Aug 07 22:30:09 2016 -0400 |
[ |
b'@@ -0,0 +1,497 @@\n+#!/usr/bin/env python\n+\n+"""\n+Runs Bowtie on single-end or paired-end data.\n+For use with Bowtie v. 0.12.7\n+\n+usage: bowtie_wrapper.py [options]\n+ -t, --threads=t: The number of threads to run\n+ -o, --output=o: The output file\n+ --output_unmapped_reads=: File name for unmapped reads (single-end)\n+ --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)\n+ --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)\n+ --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)\n+ --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)\n+ --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)\n+ -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format\n+ -I, --input2=I: The reverse reads file in Sanger FASTQ format\n+ -4, --dataType=4: The type of data (SOLiD or Solexa)\n+ -2, --paired=2: Whether the data is single- or paired-end\n+ -g, --genomeSource=g: The type of reference provided\n+ -r, --ref=r: The reference genome to use or index\n+ -s, --skip=s: Skip the first n reads\n+ -a, --alignLimit=a: Only align the first n reads\n+ -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment\n+ -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment\n+ -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed\n+ -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions\n+ -l, --seedLen=l: Seed length\n+ -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30\n+ -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode\n+ -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist\n+ -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair\n+ -v, --valAlign=v: Report up to n valid alignments per read or pair\n+ -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist\n+ -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are \'best\' in terms of stratum and in terms of the quality values at the mismatched positions\n+ -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read\n+ -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable\n+ -j, --minInsert=j: Minimum insert size for valid paired-end alignments\n+ -J, --maxInsert=J: Maximum insert size for valid paired-end alignments\n+ -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand\n+ -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate\n+ -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand\n+ -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand\n+ -F, --offrate=F: Override the offrate of the index to n\n+ -8, --snpphred=8: SNP penalty on Phred scale\n+ -6, --snpfrac=6: Fraction of sites expected to be SNP sites\n+ -7, --keepends=7: Keep extreme-end nucleotides and qualities\n+ -S, --seed=S: Seed for pseudo-random number generator\n+ -C, --params=C: Whether to use default or specified parameters\n+ -u, --iautoB=u: Automatic or specified behavior\n+ -K, --ipacked=K: Whether or not to use a packed representation for DNA strings\n+ -Q, --ibmax=Q: Maximum number of suffixes allowed in a block\n+ -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference\n+ -D, --idcv=D: The period for the differ'..b' tryHard, valAlign, allValAligns, suppressAlign, best,\n+ strata, offrate, seed, snpphred, snpfrac, keepends,\n+ output_unmapped_reads, output_suppressed_reads,\n+ quality_score_encoding )\n+ \n+ \n+ \n+ \n+ \n+ \n+ except ValueError, e:\n+ # clean up temp dir\n+ if os.path.exists( tmp_index_dir ):\n+ shutil.rmtree( tmp_index_dir )\n+ stop_err( \'Something is wrong with the alignment parameters and the alignment could not be run\\n\' + str( e ) )\n+ try:\n+ # have to nest try-except in try-finally to handle 2.4\n+ try:\n+ # prepare actual mapping commands\n+ if options.paired == \'paired\':\n+ cmd2 = \'bowtie %s %s -1 %s -2 %s > %s\' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )\n+ else:\n+ cmd2 = \'bowtie %s %s %s > %s\' % ( aligning_cmds, ref_file_name, options.input1, options.output )\n+ # align\n+ tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name\n+ tmp_stderr = open( tmp, \'wb\' )\n+ proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )\n+ returncode = proc.wait()\n+ tmp_stderr.close()\n+ # get stderr, allowing for case where it\'s very large\n+ tmp_stderr = open( tmp, \'rb\' )\n+ stderr = \'\'\n+ buffsize = 1048576\n+ try:\n+ while True:\n+ stderr += tmp_stderr.read( buffsize )\n+ if not stderr or len( stderr ) % buffsize != 0:\n+ break\n+ except OverflowError:\n+ pass\n+ tmp_stderr.close()\n+ if returncode != 0:\n+ raise Exception, stderr\n+ # get suppressed and unmapped reads output files in place if appropriate\n+ if options.paired == \'paired\' and tmp_suppressed_file_name and \\\n+ options.output_suppressed_reads_l and options.output_suppressed_reads_r:\n+ try:\n+ left = tmp_suppressed_file_name.replace( \'.fastq\', \'_1.fastq\' )\n+ right = tmp_suppressed_file_name.replace( \'.fastq\', \'_1.fastq\' )\n+ shutil.move( left, options.output_suppressed_reads_l )\n+ shutil.move( right, options.output_suppressed_reads_r )\n+ except Exception, e:\n+ sys.stdout.write( \'Error producing the suppressed output file.\\n\' )\n+ if options.paired == \'paired\' and tmp_unmapped_file_name and \\\n+ options.output_unmapped_reads_l and options.output_unmapped_reads_r:\n+ try:\n+ left = tmp_unmapped_file_name.replace( \'.fastq\', \'_1.fastq\' )\n+ right = tmp_unmapped_file_name.replace( \'.fastq\', \'_2.fastq\' )\n+ shutil.move( left, options.output_unmapped_reads_l )\n+ shutil.move( right, options.output_unmapped_reads_r )\n+ except Exception, e:\n+ sys.stdout.write( \'Error producing the unmapped output file.\\n\' )\n+ # check that there are results in the output file\n+ if os.path.getsize( options.output ) == 0:\n+ raise Exception, \'The output file is empty, there may be an error with your input file or settings.\'\n+ except Exception, e:\n+ stop_err( \'Error aligning sequence. \' + str( e ) )\n+ finally:\n+ # clean up temp dir\n+ if os.path.exists( tmp_index_dir ):\n+ shutil.rmtree( tmp_index_dir )\n+ stdout += \'Sequence file aligned.\\n\'\n+ sys.stdout.write( stdout )\n+\n+if __name__ == "__main__":\n+ __main__()\n' |