annotate BSseeker2/bs_seeker2-build.py @ 1:8b26adf64adc draft default tip

V2.0.5
author weilong-guo
date Tue, 05 Nov 2013 01:55:39 -0500
parents e6df770c0e58
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
weilong-guo
parents: 0
diff changeset
1 #!/usr/bin/env python
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
2
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
3 import os
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
4 from optparse import OptionParser, OptionGroup
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
5 from bs_index.wg_build import *
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
6 from bs_index.rrbs_build import *
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
7 from bs_utils.utils import *
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
8
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
9
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
10 if __name__ == '__main__':
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
11
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
12 parser = OptionParser()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
13
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
14 parser.add_option("-f", "--file", dest="filename", help="Input your reference genome file (fasta)", metavar="FILE")
1
weilong-guo
parents: 0
diff changeset
15 parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE)
weilong-guo
parents: 0
diff changeset
16 parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Detected: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)),
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
17 metavar="PATH")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
18 parser.add_option("-d", "--db", type="string", dest="dbpath", help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]", metavar="DBPATH", default = reference_genome_path)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
19
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
20 parser.add_option("-v", "--version", action="store_true", dest="version", help="show version of BS-Seeker2", default=False)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
21
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
22 # RRBS options
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
23 rrbs_opts = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options",
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
24 "Use this options with conjuction of -r [--rrbs]")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
25 rrbs_opts.add_option("-r", "--rrbs", action="store_true", dest="rrbs", help = 'Build index specially for Reduced Representation Bisulfite Sequencing experiments. Genome other than certain fragments will be masked. [Default: %default]', default = False)
1
weilong-guo
parents: 0
diff changeset
26 rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 20)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
27 rrbs_opts.add_option("-u", "--up", type= "int", dest="up_bound", help="upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: %default]", default = 500)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
28 rrbs_opts.add_option("-c", "--cut-site", type= "string", dest="cut_format", help="Cut sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", default = "C-CGG")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
29 parser.add_option_group(rrbs_opts)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
30
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
31
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
32 (options, args) = parser.parse_args()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
33
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
34 # if no options were given by the user, print help and exit
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
35 if len(sys.argv) == 1:
1
weilong-guo
parents: 0
diff changeset
36 parser.print_help()
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
37 exit(0)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
38
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
39 if options.version :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
40 show_version()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
41 exit (-1)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
42 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
43 show_version()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
44
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
45 rrbs = options.rrbs
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
46
1
weilong-guo
parents: 0
diff changeset
47 if options.filename is not None :
weilong-guo
parents: 0
diff changeset
48 fasta_file=os.path.expanduser(options.filename)
weilong-guo
parents: 0
diff changeset
49 else :
weilong-guo
parents: 0
diff changeset
50 error("Please specify the genome file (Fasta) using \"-f\"")
weilong-guo
parents: 0
diff changeset
51
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
52 if fasta_file is None:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
53 error('Fasta file for the reference genome must be supported')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
54
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
55 if not os.path.isfile(fasta_file):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
56 error('%s cannot be found' % fasta_file)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
57
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
58 if options.aligner not in supported_aligners:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
59 error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
60
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
61 builder_exec = os.path.join(options.aligner_path or aligner_path[options.aligner],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
62 {BOWTIE : 'bowtie-build',
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
63 BOWTIE2 : 'bowtie2-build',
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
64 SOAP : '2bwt-builder',
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
65 RMAP : '' # do nothing
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
66 }[options.aligner])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
67
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
68 build_command = builder_exec + { BOWTIE : ' -f %(fname)s.fa %(fname)s',
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
69 BOWTIE2 : ' -f %(fname)s.fa %(fname)s',
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
70 SOAP : ' %(fname)s.fa'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
71 }[options.aligner]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
72
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
73
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
74 print "Reference genome file: %s" % fasta_file
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
75 print "Reduced Representation Bisulfite Sequencing: %s" % rrbs
1
weilong-guo
parents: 0
diff changeset
76 print "Short reads aligner you are using: %s" % options.aligner
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
77 print "Builder path: %s" % builder_exec
1
weilong-guo
parents: 0
diff changeset
78
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
79 #---------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
80
1
weilong-guo
parents: 0
diff changeset
81 if not os.path.isfile( builder_exec ) :
weilong-guo
parents: 0
diff changeset
82 error("Cannot file program %s for execution." % builder_exec)
weilong-guo
parents: 0
diff changeset
83
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
84 ref_path = options.dbpath
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
85
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
86 if os.path.exists(ref_path):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
87 if not os.path.isdir(ref_path):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
88 error("%s must be a directory. Please, delete it or change the -d option." % ref_path)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
89 else:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
90 os.mkdir(ref_path)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
91
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
92 if rrbs: # RRBS preprocessing
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
93 rrbs_build(fasta_file, build_command, ref_path, options.low_bound, options.up_bound, options.aligner, options.cut_format)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
94 else: # Whole genome preprocessing
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
95 wg_build(fasta_file, build_command, ref_path, options.aligner)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
96