comparison STACKS_refmap.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 import os
7 import tempfile
8 import shutil
9 import subprocess
10 import glob
11 import optparse
12 from os.path import basename
13 import zipfile
14 import tarfile
15 import gzip
16 from galaxy.datatypes.checkers import *
17 from stacks import *
18
19
20 def __main__():
21
22 # arguments recuperation
23
24 parser = optparse.OptionParser()
25 parser.add_option('-p')
26 parser.add_option('-r')
27 parser.add_option('-s')
28 parser.add_option('-O')
29 parser.add_option('-n')
30 parser.add_option('-m')
31 parser.add_option('--bound_low')
32 parser.add_option('--bound_high')
33 parser.add_option('--alpha')
34 parser.add_option('--logfile')
35 parser.add_option('--compress_output')
36 parser.add_option('--catalogsnps')
37 parser.add_option('--catalogalleles')
38 parser.add_option('--catalogtags')
39
40 # additionnal outputs
41
42 parser.add_option('--total_output')
43 parser.add_option('--tags_output')
44 parser.add_option('--snps_output')
45 parser.add_option('--alleles_output')
46 parser.add_option('--matches_output')
47 (options, args) = parser.parse_args()
48
49 # create working directories
50
51 os.mkdir('inputs')
52 os.mkdir('job_outputs')
53 os.mkdir('galaxy_outputs')
54
55 cmd_line = []
56 cmd_line.append('ref_map.pl')
57
58 # if genetic map
59
60 if options.p:
61
62 # parse config files
63
64 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p)
65
66 # check if zipped files are into the tab and change tab content
67
68 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs')
69
70 # check files extension (important to have .sam files)
71
72 check_sam_extension_and_add(tab_parent_files, 'inputs')
73
74 # create symlink into the temp dir
75
76 create_symlinks_from_tabfiles(tab_parent_files, 'inputs')
77
78 # create the command input line
79
80 for key in tab_parent_files:
81
82 # if is a file (skip repository created after a decompression)
83
84 if os.path.isfile('inputs/'+key):
85 cmd_line.extend(['-p', os.path.normpath('inputs/'+key)])
86
87 # if genetic map with progeny files
88
89 if options.r:
90
91 # parse config files
92
93 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r)
94
95 # check if zipped files are into the tab and change tab content
96
97 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs')
98
99 # check files extension (important to have .sam files)
100
101 check_sam_extension_and_add(tab_progeny_files, 'inputs')
102
103 # create symlink into the temp dir
104
105 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs')
106
107 for key in tab_progeny_files:
108
109 # if is a file (skip repository created after a decompression)
110
111 if os.path.isfile('inputs/' + key):
112 cmd_line.extend(['-r', 'inputs/' + key])
113
114 # parse config files and create symlink if individual files are selected
115
116 if options.s:
117
118 # parse config files
119
120 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s)
121
122 # check if zipped files are into the tab and change tab content
123
124 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs')
125
126 # check files extension (important to have .sam files)
127
128 check_sam_extension_and_add(tab_individual_files, 'inputs')
129
130 # create symlink into the temp dir
131
132 create_symlinks_from_tabfiles(tab_individual_files, 'inputs')
133
134 # create the command input line
135
136 for key in tab_individual_files:
137 cmd_line.extend(['-s', 'inputs/' + key])
138
139 # create the options command line
140
141 cmd_line.extend([
142 '-S',
143 '-b', '1',
144 '-T', '4',
145 '-o', 'job_outputs',
146 '-n', options.n,
147 '-m', options.m,
148 ])
149
150 if options.O:
151 cmd_line.extend(['-O', options.O])
152
153 if options.bound_low:
154 cmd_line.extend(['--bound_low', options.bound_low])
155
156 if options.bound_high:
157 cmd_line.extend(['--bound_high', options.bound_high])
158
159 if options.alpha:
160 cmd_line.extend(['--alpha', options.alpha])
161
162 # execute job
163
164 print '[COMMAND LINE]' + ' '.join(cmd_line)
165
166 p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE,
167 stderr=subprocess.PIPE)
168
169 (stdoutput, stderror) = p.communicate()
170
171 print stdoutput
172 print stderror
173
174 # postprocesses
175
176 try:
177 shutil.move('job_outputs/ref_map.log', options.logfile)
178 except:
179 sys.stderr.write('Error in ref_map execution; Please read the additional output (stdout)\n')
180
181 # go inside the outputs dir
182
183 os.chdir('job_outputs')
184
185 # move files
186
187 for i in glob.glob('*'):
188 if re.search('catalog.snps.tsv$', i):
189 shutil.copy(i, options.catalogsnps)
190 if re.search('catalog.alleles.tsv$', i):
191 shutil.copy(i, options.catalogalleles)
192 if re.search('catalog.tags.tsv$', i):
193 shutil.copy(i, options.catalogtags)
194
195 # copy all files inside tmp_dir into workdir
196
197 list_files = glob.glob('*')
198
199 # if compress output is total
200
201 if options.compress_output == 'total':
202
203 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w',
204 allowZip64=True)
205
206 for i in list_files:
207
208 mytotalzipfile.write(os.path.basename(i))
209
210 # return the unique archive
211
212 shutil.move('total.zip.temp', options.total_output)
213 elif options.compress_output == 'categories':
214
215 # if compress output is by categories
216
217 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True)
218 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True)
219 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True)
220 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True)
221
222 for i in list_files:
223
224 # for each type of files
225
226 if re.search("tags\.tsv$", i) and not re.search('batch', i):
227 mytagszip.write(os.path.basename(i))
228 os.remove(i)
229 elif re.search("snps\.tsv$", i) and not re.search('batch', i):
230 mysnpszip.write(os.path.basename(i))
231 os.remove(i)
232 elif re.search("alleles\.tsv$", i) and not re.search('batch', i):
233 myalleleszip.write(os.path.basename(i))
234 os.remove(i)
235 elif re.search("matches\.tsv$", i) and not re.search('batch', i):
236 mymatcheszip.write(os.path.basename(i))
237 os.remove(i)
238 else:
239 shutil.move(os.path.basename(i), '../galaxy_outputs')
240
241 # return archives....
242
243 shutil.move('tags.zip.temp', options.tags_output)
244 shutil.move('snps.zip.temp', options.snps_output)
245 shutil.move('alleles.zip.temp', options.alleles_output)
246 shutil.move('matches.zip.temp', options.matches_output)
247 else:
248
249 # else no compression
250
251 for i in list_files:
252 shutil.move(os.path.basename(i), '../galaxy_outputs')
253
254
255 if __name__ == '__main__':
256 __main__()
257
258