0
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Bisulfighter::bsf-call
|
|
4
|
|
5 Bisulfighter (http://epigenome.cbrc.jp/bisulfighter)
|
|
6 by National Institute of Advanced Industrial Science and Technology (AIST)
|
|
7 is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
|
|
8 http://creativecommons.org/licenses/by-nc-sa/3.0/
|
|
9 """
|
|
10
|
|
11 __version__= "1.3"
|
|
12
|
|
13 from optparse import OptionParser
|
|
14 import os
|
|
15 import sys
|
|
16 import re
|
|
17
|
|
18 prog = 'bsf-call'
|
|
19 usage = """%prog [options] refgenome read1 read2 ...
|
|
20 example: %prog -o experiment.txt hg38.fa paired-sample1-1.fastq,paired-sample1-2.fastq"""
|
|
21 description = "A mapping of the read bisulfite treated by LAST, to detect methylated cytosine (mC) of the results, and outputs the detection result to the file."
|
|
22
|
|
23 op = OptionParser(prog=prog, usage=usage, description=description, version="%s-%s" % (prog, __version__))
|
|
24
|
|
25 op.add_option("-c", "--coverage", type="int", default=5, metavar="C",
|
|
26 help="threshold of read coverate (default: %default)")
|
|
27
|
|
28 # op.add_option("-d", "--pe-direction", type="string", default="ff",
|
|
29 # help="direction of paired-end probes: ff, fr, rf, rr (default: %default)")
|
|
30 #
|
|
31 op.add_option("-l", "--lower-bound", type="float", default=0.05, metavar="L",
|
|
32 help="threshold of mC ratio (default: %default)")
|
|
33
|
|
34 op.add_option("-p", "--multi-thread", type="int", default=1, metavar="P",
|
|
35 help="number of threads (default: %default)")
|
|
36
|
|
37 op.add_option("-s", "", type="int", default=150, metavar="S",
|
|
38 help="threshold of the alignment score at filtering (default: %default)")
|
|
39
|
|
40 op.add_option("-m", "", type="float", default=1e-9, metavar="M",
|
|
41 help="threshold of the mismap probability at filtering (default: %default)")
|
|
42
|
|
43 # op.add_option("", "--last", type="string", default="", metavar="OPT1,OPT2,...",
|
|
44 # help="options for LAST (lastal command)")
|
|
45 #
|
|
46 op.add_option("-o", "", type="string", default="bsf-call.out", metavar="FILE",
|
|
47 help="output file (default: bsf-call.out)")
|
|
48
|
|
49 op.add_option("-W", "", type="string", default="./bsfwork", metavar="WORKDIR",
|
|
50 help="work directory (default: ./bsfwork)")
|
|
51
|
|
52 op.add_option("", "--work-auto", action="store_true", dest="auto_create_work_dir", default=False,
|
|
53 help="create work directory automatically")
|
|
54
|
|
55 # op.add_option("-n", "", action="store_true", dest="use_cluster", default=False,
|
|
56 # help="run bsf-call on pc cluster")
|
|
57 #
|
|
58 # op.add_option("-q", "", type="string", default="", metavar="QUEUE_LIST",
|
|
59 # help="queue list")
|
|
60 #
|
|
61 op.add_option("-M", "", type="string", metavar="MAPPING_DIR",
|
|
62 help="mapping result directory")
|
|
63
|
|
64 op.add_option("-T", "", type="string", metavar="LOCAL_DIR",
|
|
65 help="local directory")
|
|
66
|
|
67 # op.add_option("-r", "", type="string", default="100M", metavar="SPLIT_READ_SIZE",
|
|
68 # help="split read size")
|
|
69 #
|
|
70 # op.add_option("", "--bam", action="store_true", dest="read_bam", default=False,
|
|
71 # help="read BAM file for mC detection")
|
|
72 #
|
|
73 # op.add_option("", "--sam", action="store_true", dest="read_sam", default=False,
|
|
74 # help="read SAM file for mC detection")
|
|
75 #
|
|
76 # op.add_option("-z", "--compress-prog", type="string", dest="z", metavar="COMPRESS_PROG", default="bzip2",
|
|
77 # help="compression program")
|
|
78
|
|
79 options, args = op.parse_args()
|
|
80
|
|
81 errors = []
|
|
82
|
|
83 work_dir = None
|
|
84 if options.W:
|
|
85 work_dir = options.W
|
|
86 else:
|
|
87 if not options.auto_create_work_dir:
|
|
88 work_dir = "bsfwork"
|
|
89
|
|
90 if options.M:
|
|
91 if len(args) < 1:
|
|
92 op.error("\n Reference genome is not specified.")
|
|
93
|
|
94 for result_dir in options.M.split(","):
|
|
95 if not os.path.exists(result_dir):
|
|
96 errors.append("Mapping result directory: '%s' does not exist." % options.M)
|
|
97
|
|
98 # if options.read_bam and options.read_sam:
|
|
99 # errors.append("--bam and --sam cannot be placed simultaneously.")
|
|
100
|
|
101 ref_genome = args[0]
|
|
102 reads = None
|
|
103 else:
|
|
104 if len(args) < 2:
|
|
105 op.error("\n Reference genome and read sequence is not specified.")
|
|
106
|
|
107 # if options.read_bam:
|
|
108 # errors.append("--bam option is specified but -M option is not specified.")
|
|
109
|
|
110 # if options.read_sam:
|
|
111 # errors.append("--sam option is specified but -M option is not specified.")
|
|
112
|
|
113 ref_genome = args[0]
|
|
114 reads = args[1:]
|
|
115
|
|
116 for read_files in reads:
|
|
117 for read_file in read_files.split(','):
|
|
118 if not os.path.exists(read_file):
|
|
119 errors.append("Read file: '%s' does not exists." % read_file)
|
|
120
|
|
121 if work_dir and os.path.exists(work_dir):
|
|
122 errors.append("Working directory: '%s' already exists." % work_dir)
|
|
123
|
|
124 if not os.path.exists(ref_genome):
|
|
125 errors.append("Reference genome: '%s' does not exists." % ref_genome)
|
|
126
|
|
127 # if options.read_bam or options.read_sam:
|
|
128 # try:
|
|
129 # import pysam
|
|
130 # except:
|
|
131 # errors.append("--bam or --sam is specified but pysam is not installed.")
|
|
132
|
|
133 if len(errors) > 0:
|
|
134 op.error("\n " + "\n ".join(errors))
|
|
135
|
|
136 cmd_opts = {}
|
|
137 cmd_opts["coverage"] = options.coverage
|
|
138 # cmd_opts["pe_direction"] = options.pe_direction
|
|
139 cmd_opts["num_threads"] = options.multi_thread
|
|
140 cmd_opts["lower_bound"] = options.lower_bound
|
|
141 cmd_opts["aln_score_thres"] = options.s
|
|
142 cmd_opts["aln_mismap_prob_thres"] = options.m
|
|
143 cmd_opts["output"] = options.o
|
|
144 # cmd_opts["last_opts"] = options.last
|
|
145 cmd_opts["work_dir"] = work_dir
|
|
146 # cmd_opts["use_cluster"] = options.use_cluster
|
|
147 # cmd_opts["queue_list"] = options.q
|
|
148 cmd_opts["mapping_dir"] = options.M
|
|
149 cmd_opts["local_dir"] = options.T
|
|
150 # cmd_opts["split_read_size"] = options.r
|
|
151 # cmd_opts["read_bam"] = options.read_bam
|
|
152 # cmd_opts["read_sam"] = options.read_sam
|
|
153 # cmd_opts["compress_prog"] = options.z
|
|
154
|
|
155 try:
|
|
156 sys.path.append('.');
|
|
157 import bsfcall
|
|
158 except ImportError:
|
|
159 errors.append("\"import bsfcall\" failed. Please be sure you have bsfcall.py in your python library path.");
|
|
160
|
|
161 import subprocess
|
|
162
|
|
163 # if not checkRunnable('last-map-probs'):
|
|
164 # sys.exit(1)
|
|
165 # if not checkRunnable('last-pair-probs'):
|
|
166 # sys.exit(1)
|
|
167 # if not checkRunnable(options.z):
|
|
168 # sys.exit(1)
|
|
169
|
|
170 if len(errors) > 0:
|
|
171 op.error("\n " + "\n ".join(errors))
|
|
172
|
|
173 # if cmd_opts["use_cluster"]:
|
|
174 # cmd_opts["num_threads"] = 1
|
|
175 # bsf_call = bsfcall.BsfCallCluster(ref_genome, reads, cmd_opts)
|
|
176 # else:
|
|
177 # bsf_call = bsfcall.BsfCall(ref_genome, reads, cmd_opts)
|
|
178 bsf_call = bsfcall.BsfCall(ref_genome, reads, cmd_opts)
|
|
179
|
|
180 bsf_call.execute()
|
|
181
|
|
182 sys.exit(0)
|
|
183
|
|
184 def checkRunnable(cmd):
|
|
185 for outline, errline in runProcess(cmd.split()):
|
|
186 if (re.match(r'error: subProcess:', errline) is not None):
|
|
187 print >>sys.stderr, '\"%s\" is not found.' % cmd
|
|
188 return False
|
|
189 return True
|
|
190
|
|
191 def runProcess(exe):
|
|
192 try:
|
|
193 p = subprocess.Popen(exe, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
194 except:
|
|
195 yield (None, 'error: runProcess: \'%s\' failed.' % ' '.join(exe))
|
|
196 return
|
|
197 while (True):
|
|
198 retcode = p.poll()
|
|
199 if (retcode is not None):
|
|
200 break
|
|
201 outline = p.stdout.readline()
|
|
202 errline = p.stderr.readline()
|
|
203 yield (outline, errline)
|
|
204
|