Mercurial > repos > yutaka-saito > bsfcall
comparison bin/bsf-call @ 0:06f8460885ff
migrate from GitHub
author | yutaka-saito |
---|---|
date | Sun, 19 Apr 2015 20:51:13 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:06f8460885ff |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Bisulfighter::bsf-call | |
4 | |
5 Bisulfighter (http://epigenome.cbrc.jp/bisulfighter) | |
6 by National Institute of Advanced Industrial Science and Technology (AIST) | |
7 is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License. | |
8 http://creativecommons.org/licenses/by-nc-sa/3.0/ | |
9 """ | |
10 | |
11 __version__= "1.3" | |
12 | |
13 from optparse import OptionParser | |
14 import os | |
15 import sys | |
16 import re | |
17 | |
18 prog = 'bsf-call' | |
19 usage = """%prog [options] refgenome read1 read2 ... | |
20 example: %prog -o experiment.txt hg38.fa paired-sample1-1.fastq,paired-sample1-2.fastq""" | |
21 description = "A mapping of the read bisulfite treated by LAST, to detect methylated cytosine (mC) of the results, and outputs the detection result to the file." | |
22 | |
23 op = OptionParser(prog=prog, usage=usage, description=description, version="%s-%s" % (prog, __version__)) | |
24 | |
25 op.add_option("-c", "--coverage", type="int", default=5, metavar="C", | |
26 help="threshold of read coverate (default: %default)") | |
27 | |
28 # op.add_option("-d", "--pe-direction", type="string", default="ff", | |
29 # help="direction of paired-end probes: ff, fr, rf, rr (default: %default)") | |
30 # | |
31 op.add_option("-l", "--lower-bound", type="float", default=0.05, metavar="L", | |
32 help="threshold of mC ratio (default: %default)") | |
33 | |
34 op.add_option("-p", "--multi-thread", type="int", default=1, metavar="P", | |
35 help="number of threads (default: %default)") | |
36 | |
37 op.add_option("-s", "", type="int", default=150, metavar="S", | |
38 help="threshold of the alignment score at filtering (default: %default)") | |
39 | |
40 op.add_option("-m", "", type="float", default=1e-9, metavar="M", | |
41 help="threshold of the mismap probability at filtering (default: %default)") | |
42 | |
43 # op.add_option("", "--last", type="string", default="", metavar="OPT1,OPT2,...", | |
44 # help="options for LAST (lastal command)") | |
45 # | |
46 op.add_option("-o", "", type="string", default="bsf-call.out", metavar="FILE", | |
47 help="output file (default: bsf-call.out)") | |
48 | |
49 op.add_option("-W", "", type="string", default="./bsfwork", metavar="WORKDIR", | |
50 help="work directory (default: ./bsfwork)") | |
51 | |
52 op.add_option("", "--work-auto", action="store_true", dest="auto_create_work_dir", default=False, | |
53 help="create work directory automatically") | |
54 | |
55 # op.add_option("-n", "", action="store_true", dest="use_cluster", default=False, | |
56 # help="run bsf-call on pc cluster") | |
57 # | |
58 # op.add_option("-q", "", type="string", default="", metavar="QUEUE_LIST", | |
59 # help="queue list") | |
60 # | |
61 op.add_option("-M", "", type="string", metavar="MAPPING_DIR", | |
62 help="mapping result directory") | |
63 | |
64 op.add_option("-T", "", type="string", metavar="LOCAL_DIR", | |
65 help="local directory") | |
66 | |
67 # op.add_option("-r", "", type="string", default="100M", metavar="SPLIT_READ_SIZE", | |
68 # help="split read size") | |
69 # | |
70 # op.add_option("", "--bam", action="store_true", dest="read_bam", default=False, | |
71 # help="read BAM file for mC detection") | |
72 # | |
73 # op.add_option("", "--sam", action="store_true", dest="read_sam", default=False, | |
74 # help="read SAM file for mC detection") | |
75 # | |
76 # op.add_option("-z", "--compress-prog", type="string", dest="z", metavar="COMPRESS_PROG", default="bzip2", | |
77 # help="compression program") | |
78 | |
79 options, args = op.parse_args() | |
80 | |
81 errors = [] | |
82 | |
83 work_dir = None | |
84 if options.W: | |
85 work_dir = options.W | |
86 else: | |
87 if not options.auto_create_work_dir: | |
88 work_dir = "bsfwork" | |
89 | |
90 if options.M: | |
91 if len(args) < 1: | |
92 op.error("\n Reference genome is not specified.") | |
93 | |
94 for result_dir in options.M.split(","): | |
95 if not os.path.exists(result_dir): | |
96 errors.append("Mapping result directory: '%s' does not exist." % options.M) | |
97 | |
98 # if options.read_bam and options.read_sam: | |
99 # errors.append("--bam and --sam cannot be placed simultaneously.") | |
100 | |
101 ref_genome = args[0] | |
102 reads = None | |
103 else: | |
104 if len(args) < 2: | |
105 op.error("\n Reference genome and read sequence is not specified.") | |
106 | |
107 # if options.read_bam: | |
108 # errors.append("--bam option is specified but -M option is not specified.") | |
109 | |
110 # if options.read_sam: | |
111 # errors.append("--sam option is specified but -M option is not specified.") | |
112 | |
113 ref_genome = args[0] | |
114 reads = args[1:] | |
115 | |
116 for read_files in reads: | |
117 for read_file in read_files.split(','): | |
118 if not os.path.exists(read_file): | |
119 errors.append("Read file: '%s' does not exists." % read_file) | |
120 | |
121 if work_dir and os.path.exists(work_dir): | |
122 errors.append("Working directory: '%s' already exists." % work_dir) | |
123 | |
124 if not os.path.exists(ref_genome): | |
125 errors.append("Reference genome: '%s' does not exists." % ref_genome) | |
126 | |
127 # if options.read_bam or options.read_sam: | |
128 # try: | |
129 # import pysam | |
130 # except: | |
131 # errors.append("--bam or --sam is specified but pysam is not installed.") | |
132 | |
133 if len(errors) > 0: | |
134 op.error("\n " + "\n ".join(errors)) | |
135 | |
136 cmd_opts = {} | |
137 cmd_opts["coverage"] = options.coverage | |
138 # cmd_opts["pe_direction"] = options.pe_direction | |
139 cmd_opts["num_threads"] = options.multi_thread | |
140 cmd_opts["lower_bound"] = options.lower_bound | |
141 cmd_opts["aln_score_thres"] = options.s | |
142 cmd_opts["aln_mismap_prob_thres"] = options.m | |
143 cmd_opts["output"] = options.o | |
144 # cmd_opts["last_opts"] = options.last | |
145 cmd_opts["work_dir"] = work_dir | |
146 # cmd_opts["use_cluster"] = options.use_cluster | |
147 # cmd_opts["queue_list"] = options.q | |
148 cmd_opts["mapping_dir"] = options.M | |
149 cmd_opts["local_dir"] = options.T | |
150 # cmd_opts["split_read_size"] = options.r | |
151 # cmd_opts["read_bam"] = options.read_bam | |
152 # cmd_opts["read_sam"] = options.read_sam | |
153 # cmd_opts["compress_prog"] = options.z | |
154 | |
155 try: | |
156 sys.path.append('.'); | |
157 import bsfcall | |
158 except ImportError: | |
159 errors.append("\"import bsfcall\" failed. Please be sure you have bsfcall.py in your python library path."); | |
160 | |
161 import subprocess | |
162 | |
163 # if not checkRunnable('last-map-probs'): | |
164 # sys.exit(1) | |
165 # if not checkRunnable('last-pair-probs'): | |
166 # sys.exit(1) | |
167 # if not checkRunnable(options.z): | |
168 # sys.exit(1) | |
169 | |
170 if len(errors) > 0: | |
171 op.error("\n " + "\n ".join(errors)) | |
172 | |
173 # if cmd_opts["use_cluster"]: | |
174 # cmd_opts["num_threads"] = 1 | |
175 # bsf_call = bsfcall.BsfCallCluster(ref_genome, reads, cmd_opts) | |
176 # else: | |
177 # bsf_call = bsfcall.BsfCall(ref_genome, reads, cmd_opts) | |
178 bsf_call = bsfcall.BsfCall(ref_genome, reads, cmd_opts) | |
179 | |
180 bsf_call.execute() | |
181 | |
182 sys.exit(0) | |
183 | |
184 def checkRunnable(cmd): | |
185 for outline, errline in runProcess(cmd.split()): | |
186 if (re.match(r'error: subProcess:', errline) is not None): | |
187 print >>sys.stderr, '\"%s\" is not found.' % cmd | |
188 return False | |
189 return True | |
190 | |
191 def runProcess(exe): | |
192 try: | |
193 p = subprocess.Popen(exe, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
194 except: | |
195 yield (None, 'error: runProcess: \'%s\' failed.' % ' '.join(exe)) | |
196 return | |
197 while (True): | |
198 retcode = p.poll() | |
199 if (retcode is not None): | |
200 break | |
201 outline = p.stdout.readline() | |
202 errline = p.stderr.readline() | |
203 yield (outline, errline) | |
204 |