annotate BSseeker2/galaxy/bs_seeker2_wrapper.py @ 1:8b26adf64adc draft default tip

V2.0.5
author weilong-guo
date Tue, 05 Nov 2013 01:55:39 -0500
parents e6df770c0e58
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1 import tempfile
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
2
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
3 __author__ = 'pf'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
4 from subprocess import Popen
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
5 from collections import defaultdict
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
6 import sys, shutil, os, re
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
7
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
8
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
9
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
10 BUILD = 'build'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
11 ALIGN = 'align'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
12 CALL_METHYLATION = 'call_methylation'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
13
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
14 EXEC = 'exec'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
15 EXEC_PATH = EXEC+'-path'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
16 ARG_TYPES = [BUILD, ALIGN, CALL_METHYLATION, EXEC]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
17
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
18 USAGE = """
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
19 %(script)s is a wrapper script for bs_seeker2-build.py and bs_seeker2-align.py that is intended to be used with the Galaxy web platform.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
20 The script takes command line parameters and runs bs_seeker2-align.py and bs_seeker2-build.py, if neccessary.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
21
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
22 The parameters that are related to bs_seeker2-build.py must be prefixed with --%(build_tag)s.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
23 The parameters that are related to bs_seeker2-align.py must be prefixed with --%(align_tag)s.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
24 Additionally, the path to BS-Seeker2 has to be specified via the --%(exec_path)s option.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
25
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
26 For example:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
27
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
28 python %(script)s --%(exec_path)s /mnt/Data/UCLA/Matteo/BS-Seeker --build-f data/arabidopsis/genome/Arabidopsis.fa --align-i data/arabidopsis/BS6_N1try2L7_seq.txt.fa --align-o data/arabidopsis/BS6_N1try2L7_seq.txt.fa.test_output
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
29
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
30 This will run build the genome in Arabidopsis.fa and put the indexes in a temporary directory. bs_seeker2-align.py will be run on the
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
31 newly created genome index. I.e. the following two commands will be run in a shell:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
32
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
33 python /mnt/Data/UCLA/Matteo/BS-Seeker/bs_seeker2-build.py --db /tmp/tmpg8Eq1o -f /mnt/Data/UCLA/Matteo/bck_BS-Seeker/data/arabidopsis/genome/Arabidopsis.fa
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
34
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
35 python /mnt/Data/UCLA/Matteo/BS-Seeker/bs_seeker2-align.py --db /tmp/tmpg8Eq1o -o /mnt/Data/UCLA/Matteo/bck_BS-Seeker/data/arabidopsis/BS6_N1try2L7_seq.txt.fa.test_output -i /mnt/Data/UCLA/Matteo/bck_BS-Seeker/data/arabidopsis/BS6_N1try2L7_seq.txt.fa -g Arabidopsis.fa
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
36
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
37
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
38 The temporary directory will be deleted after the wrapper exits.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
39
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
40
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
41 If no options related to bs_seeker2-build are passed, no genome index will be built and the corresponding pre-built genome index will be used
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
42 instead. No temporary files and directories will be created.
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
43
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
44 """ % { 'script' : os.path.split(__file__)[1], 'build_tag' :BUILD, 'align_tag' : ALIGN, 'exec_path' : EXEC_PATH }
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
45
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
46
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
47 def error(msg):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
48 print >> sys.stderr, 'ERROR: %s' % msg
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
49 exit(1)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
50
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
51
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
52 if __name__ == '__main__':
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
53 if len(sys.argv) == 1:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
54 error('No parameters\n\n'+USAGE)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
55
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
56
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
57 # Parse command line arguments
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
58 args = defaultdict(dict)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
59 arg_key = None
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
60 arg_val = None
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
61 arg_type = None
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
62
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
63 for arg in sys.argv[1:]:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
64 if arg.startswith('--'):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
65 try:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
66 arg_type, arg_key = re.match(r'--(\w+)(.*)', arg).groups()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
67 if arg_type not in ARG_TYPES:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
68 raise Exception("Bad argument: %s. arg_type (%s) must be one of: %s." % (arg, arg_type, ', '.join(ARG_TYPES)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
69 if not arg_key or arg_key[0] != '-':
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
70 raise Exception("Bad argument: %s. arg_key (%s) must start with - or --." % (arg, arg_key))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
71 except Exception, e:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
72 error(str(e) + '\n\n' + USAGE)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
73 args[arg_type][arg_key] = ''
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
74 else:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
75 args[arg_type][arg_key] = arg
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
76
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
77 path_to_bs_seeker = args.get('exec', {'-path' : None})['-path'] # return None when exec not found
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
78 if path_to_bs_seeker is None:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
79 error('You have to specify the path to BS-Seeker2 via --%s\n\n' % EXEC_PATH + USAGE)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
80
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
81 tempdir = None
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
82 def run_prog(prog, params):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
83 cwd, _ = os.path.split(__file__)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
84 cmd = 'python %(prog)s %(params)s' % {
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
85 'prog' : os.path.join(cwd, prog),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
86 'params' : ' '.join('%s %s' % (arg_key, arg_val) for arg_key, arg_val in params.items())
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
87 }
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
88 print 'exec:', cmd
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
89
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
90 return_code = Popen(args = cmd, shell = True).wait()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
91 if return_code:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
92 if tempdir:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
93 shutil.rmtree(tempdir)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
94 error("%s exit with error code %d" % (prog, return_code))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
95 tempdir = tempfile.mkdtemp()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
96
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
97 # bs_seeker2-build
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
98 if BUILD in args:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
99 args[BUILD]['--db'] = tempdir
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
100 args[ALIGN]['--db'] = tempdir
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
101 run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-build.py'), args[BUILD])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
102
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
103 # bs_seeker2-align
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
104 args[ALIGN]['--temp_dir'] = tempdir
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
105 run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-align.py'), args[ALIGN])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
106
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
107
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
108 def getopt(h, k1, k2, default):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
109 return h.get(k1, h.get(k2, default))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
110 # bs_seeker2-call_methylation
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
111 args[CALL_METHYLATION].update({ '-i' : args[ALIGN]['--output'],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
112 '--db' : os.path.join(args[ALIGN]['--db'],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
113 os.path.split( getopt(args[ALIGN],'-g', '--genome', None))[1] +
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
114 ('_rrbs_%s_%s' % (getopt(args[ALIGN], '-l', '--low', '40'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
115 getopt(args[ALIGN], '-u', '--up', '500'))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
116 if len(set(['-r', '--rrbs']) & set(args[ALIGN])) > 0 else '') +
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
117 '_' + args[ALIGN]['--aligner'])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
118 })
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
119 run_prog(os.path.join(path_to_bs_seeker, 'bs_seeker2-call_methylation.py'), args[CALL_METHYLATION])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
120
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
121 if tempdir:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
122 shutil.rmtree(tempdir)