0
|
1 #!/usr/bin/python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 import sys
|
|
5 import re
|
|
6 import os
|
|
7 import tempfile
|
|
8 import shutil
|
|
9 import subprocess
|
|
10 import glob
|
|
11 import optparse
|
|
12 from os.path import basename
|
|
13 import zipfile
|
|
14 import tarfile
|
|
15 import gzip
|
|
16 from galaxy.datatypes.checkers import *
|
|
17 from stacks import *
|
|
18
|
|
19
|
|
20 def __main__():
|
|
21
|
|
22 # arguments recuperation
|
|
23
|
|
24 parser = optparse.OptionParser()
|
|
25 parser.add_option('-p')
|
|
26 parser.add_option('-r')
|
|
27 parser.add_option('-s')
|
|
28 parser.add_option('-O')
|
|
29 parser.add_option('-n')
|
|
30 parser.add_option('-m')
|
|
31 parser.add_option('--bound_low')
|
|
32 parser.add_option('--bound_high')
|
|
33 parser.add_option('--alpha')
|
|
34 parser.add_option('--logfile')
|
|
35 parser.add_option('--compress_output')
|
|
36 parser.add_option('--catalogsnps')
|
|
37 parser.add_option('--catalogalleles')
|
|
38 parser.add_option('--catalogtags')
|
|
39
|
|
40 # additionnal outputs
|
|
41
|
|
42 parser.add_option('--total_output')
|
|
43 parser.add_option('--tags_output')
|
|
44 parser.add_option('--snps_output')
|
|
45 parser.add_option('--alleles_output')
|
|
46 parser.add_option('--matches_output')
|
|
47 (options, args) = parser.parse_args()
|
|
48
|
|
49 # create working directories
|
|
50
|
|
51 os.mkdir('inputs')
|
|
52 os.mkdir('job_outputs')
|
|
53 os.mkdir('galaxy_outputs')
|
|
54
|
|
55 cmd_line = []
|
|
56 cmd_line.append('ref_map.pl')
|
|
57
|
|
58 # if genetic map
|
|
59
|
|
60 if options.p:
|
|
61
|
|
62 # parse config files
|
|
63
|
|
64 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p)
|
|
65
|
|
66 # check if zipped files are into the tab and change tab content
|
|
67
|
|
68 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs')
|
|
69
|
|
70 # check files extension (important to have .sam files)
|
|
71
|
|
72 check_sam_extension_and_add(tab_parent_files, 'inputs')
|
|
73
|
|
74 # create symlink into the temp dir
|
|
75
|
|
76 create_symlinks_from_tabfiles(tab_parent_files, 'inputs')
|
|
77
|
|
78 # create the command input line
|
|
79
|
|
80 for key in tab_parent_files:
|
|
81
|
|
82 # if is a file (skip repository created after a decompression)
|
|
83
|
|
84 if os.path.isfile('inputs/'+key):
|
|
85 cmd_line.extend(['-p', os.path.normpath('inputs/'+key)])
|
|
86
|
|
87 # if genetic map with progeny files
|
|
88
|
|
89 if options.r:
|
|
90
|
|
91 # parse config files
|
|
92
|
|
93 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r)
|
|
94
|
|
95 # check if zipped files are into the tab and change tab content
|
|
96
|
|
97 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs')
|
|
98
|
|
99 # check files extension (important to have .sam files)
|
|
100
|
|
101 check_sam_extension_and_add(tab_progeny_files, 'inputs')
|
|
102
|
|
103 # create symlink into the temp dir
|
|
104
|
|
105 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs')
|
|
106
|
|
107 for key in tab_progeny_files:
|
|
108
|
|
109 # if is a file (skip repository created after a decompression)
|
|
110
|
|
111 if os.path.isfile('inputs/' + key):
|
|
112 cmd_line.extend(['-r', 'inputs/' + key])
|
|
113
|
|
114 # parse config files and create symlink if individual files are selected
|
|
115
|
|
116 if options.s:
|
|
117
|
|
118 # parse config files
|
|
119
|
|
120 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s)
|
|
121
|
|
122 # check if zipped files are into the tab and change tab content
|
|
123
|
|
124 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs')
|
|
125
|
|
126 # check files extension (important to have .sam files)
|
|
127
|
|
128 check_sam_extension_and_add(tab_individual_files, 'inputs')
|
|
129
|
|
130 # create symlink into the temp dir
|
|
131
|
|
132 create_symlinks_from_tabfiles(tab_individual_files, 'inputs')
|
|
133
|
|
134 # create the command input line
|
|
135
|
|
136 for key in tab_individual_files:
|
|
137 cmd_line.extend(['-s', 'inputs/' + key])
|
|
138
|
|
139 # create the options command line
|
|
140
|
|
141 cmd_line.extend([
|
|
142 '-S',
|
|
143 '-b', '1',
|
|
144 '-T', '4',
|
|
145 '-o', 'job_outputs',
|
|
146 '-n', options.n,
|
|
147 '-m', options.m,
|
|
148 ])
|
|
149
|
|
150 if options.O:
|
|
151 cmd_line.extend(['-O', options.O])
|
|
152
|
|
153 if options.bound_low:
|
|
154 cmd_line.extend(['--bound_low', options.bound_low])
|
|
155
|
|
156 if options.bound_high:
|
|
157 cmd_line.extend(['--bound_high', options.bound_high])
|
|
158
|
|
159 if options.alpha:
|
|
160 cmd_line.extend(['--alpha', options.alpha])
|
|
161
|
|
162 # execute job
|
|
163
|
|
164 print '[COMMAND LINE]' + ' '.join(cmd_line)
|
|
165
|
|
166 p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE,
|
|
167 stderr=subprocess.PIPE)
|
|
168
|
|
169 (stdoutput, stderror) = p.communicate()
|
|
170
|
|
171 print stdoutput
|
|
172 print stderror
|
|
173
|
|
174 # postprocesses
|
|
175
|
|
176 try:
|
|
177 shutil.move('job_outputs/ref_map.log', options.logfile)
|
|
178 except:
|
|
179 sys.stderr.write('Error in ref_map execution; Please read the additional output (stdout)\n')
|
|
180
|
|
181 # go inside the outputs dir
|
|
182
|
|
183 os.chdir('job_outputs')
|
|
184
|
|
185 # move files
|
|
186
|
|
187 for i in glob.glob('*'):
|
|
188 if re.search('catalog.snps.tsv$', i):
|
|
189 shutil.copy(i, options.catalogsnps)
|
|
190 if re.search('catalog.alleles.tsv$', i):
|
|
191 shutil.copy(i, options.catalogalleles)
|
|
192 if re.search('catalog.tags.tsv$', i):
|
|
193 shutil.copy(i, options.catalogtags)
|
|
194
|
|
195 # copy all files inside tmp_dir into workdir
|
|
196
|
|
197 list_files = glob.glob('*')
|
|
198
|
|
199 # if compress output is total
|
|
200
|
|
201 if options.compress_output == 'total':
|
|
202
|
|
203 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w',
|
|
204 allowZip64=True)
|
|
205
|
|
206 for i in list_files:
|
|
207
|
|
208 mytotalzipfile.write(os.path.basename(i))
|
|
209
|
|
210 # return the unique archive
|
|
211
|
|
212 shutil.move('total.zip.temp', options.total_output)
|
|
213 elif options.compress_output == 'categories':
|
|
214
|
|
215 # if compress output is by categories
|
|
216
|
|
217 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True)
|
|
218 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True)
|
|
219 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True)
|
|
220 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True)
|
|
221
|
|
222 for i in list_files:
|
|
223
|
|
224 # for each type of files
|
|
225
|
|
226 if re.search("tags\.tsv$", i) and not re.search('batch', i):
|
|
227 mytagszip.write(os.path.basename(i))
|
|
228 os.remove(i)
|
|
229 elif re.search("snps\.tsv$", i) and not re.search('batch', i):
|
|
230 mysnpszip.write(os.path.basename(i))
|
|
231 os.remove(i)
|
|
232 elif re.search("alleles\.tsv$", i) and not re.search('batch', i):
|
|
233 myalleleszip.write(os.path.basename(i))
|
|
234 os.remove(i)
|
|
235 elif re.search("matches\.tsv$", i) and not re.search('batch', i):
|
|
236 mymatcheszip.write(os.path.basename(i))
|
|
237 os.remove(i)
|
|
238 else:
|
|
239 shutil.move(os.path.basename(i), '../galaxy_outputs')
|
|
240
|
|
241 # return archives....
|
|
242
|
|
243 shutil.move('tags.zip.temp', options.tags_output)
|
|
244 shutil.move('snps.zip.temp', options.snps_output)
|
|
245 shutil.move('alleles.zip.temp', options.alleles_output)
|
|
246 shutil.move('matches.zip.temp', options.matches_output)
|
|
247 else:
|
|
248
|
|
249 # else no compression
|
|
250
|
|
251 for i in list_files:
|
|
252 shutil.move(os.path.basename(i), '../galaxy_outputs')
|
|
253
|
|
254
|
|
255 if __name__ == '__main__':
|
|
256 __main__()
|
|
257
|
|
258
|