Mercurial > repos > weilong-guo > bs_seeker2
comparison BSseeker2/galaxy/bs_seeker2_wrapper.py @ 0:e6df770c0e58 draft
Initial upload
author | weilong-guo |
---|---|
date | Fri, 12 Jul 2013 18:47:28 -0400 |
parents | |
children | 8b26adf64adc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e6df770c0e58 |
---|---|
1 import tempfile | |
2 | |
3 __author__ = 'pf' | |
4 from subprocess import Popen | |
5 from collections import defaultdict | |
6 import sys, shutil, os, re | |
7 | |
8 | |
9 | |
10 BUILD = 'build' | |
11 ALIGN = 'align' | |
12 CALL_METHYLATION = 'call_methylation' | |
13 | |
14 EXEC = 'exec' | |
15 EXEC_PATH = EXEC+'-path' | |
16 ARG_TYPES = [BUILD, ALIGN, CALL_METHYLATION, EXEC] | |
17 | |
18 USAGE = """ | |
19 %(script)s is a wrapper script for bs_seeker2-build.py and bs_seeker2-align.py that is intended to be used with the Galaxy web platform. | |
20 The script takes command line parameters and runs bs_seeker2-align.py and bs_seeker2-build.py, if neccessary. | |
21 | |
22 The parameters that are related to bs_seeker2-build.py must be prefixed with --%(build_tag)s. | |
23 The parameters that are related to bs_seeker2-align.py must be prefixed with --%(align_tag)s. | |
24 Additionally, the path to BS-Seeker2 has to be specified via the --%(exec_path)s option. | |
25 | |
26 For example: | |
27 | |
28 python %(script)s --%(exec_path)s /mnt/Data/UCLA/Matteo/BS-Seeker --build-f data/arabidopsis/genome/Arabidopsis.fa --align-i data/arabidopsis/BS6_N1try2L7_seq.txt.fa --align-o data/arabidopsis/BS6_N1try2L7_seq.txt.fa.test_output | |
29 | |
30 This will run build the genome in Arabidopsis.fa and put the indexes in a temporary directory. bs_seeker2-align.py will be run on the | |
31 newly created genome index. I.e. the following two commands will be run in a shell: | |
32 | |
33 python /mnt/Data/UCLA/Matteo/BS-Seeker/bs_seeker2-build.py --db /tmp/tmpg8Eq1o -f /mnt/Data/UCLA/Matteo/bck_BS-Seeker/data/arabidopsis/genome/Arabidopsis.fa | |
34 | |
35 python /mnt/Data/UCLA/Matteo/BS-Seeker/bs_seeker2-align.py --db /tmp/tmpg8Eq1o -o /mnt/Data/UCLA/Matteo/bck_BS-Seeker/data/arabidopsis/BS6_N1try2L7_seq.txt.fa.test_output -i /mnt/Data/UCLA/Matteo/bck_BS-Seeker/data/arabidopsis/BS6_N1try2L7_seq.txt.fa -g Arabidopsis.fa | |
36 | |
37 | |
38 The temporary directory will be deleted after the wrapper exits. | |
39 | |
40 | |
41 If no options related to bs_seeker2-build are passed, no genome index will be built and the corresponding pre-built genome index will be used | |
42 instead. No temporary files and directories will be created. | |
43 | |
44 """ % { 'script' : os.path.split(__file__)[1], 'build_tag' :BUILD, 'align_tag' : ALIGN, 'exec_path' : EXEC_PATH } | |
45 | |
46 | |
47 def error(msg): | |
48 print >> sys.stderr, 'ERROR: %s' % msg | |
49 exit(1) | |
50 | |
51 | |
52 if __name__ == '__main__': | |
53 if len(sys.argv) == 1: | |
54 error('No parameters\n\n'+USAGE) | |
55 | |
56 | |
57 # Parse command line arguments | |
58 args = defaultdict(dict) | |
59 arg_key = None | |
60 arg_val = None | |
61 arg_type = None | |
62 | |
63 for arg in sys.argv[1:]: | |
64 if arg.startswith('--'): | |
65 try: | |
66 arg_type, arg_key = re.match(r'--(\w+)(.*)', arg).groups() | |
67 if arg_type not in ARG_TYPES: | |
68 raise Exception("Bad argument: %s. arg_type (%s) must be one of: %s." % (arg, arg_type, ', '.join(ARG_TYPES))) | |
69 if not arg_key or arg_key[0] != '-': | |
70 raise Exception("Bad argument: %s. arg_key (%s) must start with - or --." % (arg, arg_key)) | |
71 except Exception, e: | |
72 error(str(e) + '\n\n' + USAGE) | |
73 args[arg_type][arg_key] = '' | |
74 else: | |
75 args[arg_type][arg_key] = arg | |
76 | |
77 path_to_bs_seeker = args.get('exec', {'-path' : None})['-path'] # return None when exec not found | |
78 if path_to_bs_seeker is None: | |
79 error('You have to specify the path to BS-Seeker2 via --%s\n\n' % EXEC_PATH + USAGE) | |
80 | |
81 tempdir = None | |
82 def run_prog(prog, params): | |
83 cwd, _ = os.path.split(__file__) | |
84 cmd = 'python %(prog)s %(params)s' % { | |
85 'prog' : os.path.join(cwd, prog), | |
86 'params' : ' '.join('%s %s' % (arg_key, arg_val) for arg_key, arg_val in params.items()) | |
87 } | |
88 print 'exec:', cmd | |
89 | |
90 return_code = Popen(args = cmd, shell = True).wait() | |
91 if return_code: | |
92 if tempdir: | |
93 shutil.rmtree(tempdir) | |
94 error("%s exit with error code %d" % (prog, return_code)) | |
95 tempdir = tempfile.mkdtemp() | |
96 | |
97 # bs_seeker2-build | |
98 if BUILD in args: | |
99 args[BUILD]['--db'] = tempdir | |
100 args[ALIGN]['--db'] = tempdir | |
101 run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-build.py'), args[BUILD]) | |
102 | |
103 # bs_seeker2-align | |
104 args[ALIGN]['--temp_dir'] = tempdir | |
105 run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-align.py'), args[ALIGN]) | |
106 | |
107 | |
108 def getopt(h, k1, k2, default): | |
109 return h.get(k1, h.get(k2, default)) | |
110 # bs_seeker2-call_methylation | |
111 args[CALL_METHYLATION].update({ '-i' : args[ALIGN]['--output'], | |
112 '--db' : os.path.join(args[ALIGN]['--db'], | |
113 os.path.split( getopt(args[ALIGN],'-g', '--genome', None))[1] + | |
114 ('_rrbs_%s_%s' % (getopt(args[ALIGN], '-l', '--low', '40'), | |
115 getopt(args[ALIGN], '-u', '--up', '500')) | |
116 if len(set(['-r', '--rrbs']) & set(args[ALIGN])) > 0 else '') + | |
117 | |
118 '_' + args[ALIGN]['--aligner']) | |
119 }) | |
120 run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-call_methylation.py'), args[CALL_METHYLATION]) | |
121 | |
122 if tempdir: | |
123 shutil.rmtree(tempdir) |