0
|
1 #!/usr/bin/python
|
|
2 # -*- coding: utf-8 -*-
|
|
3
|
|
4 import sys
|
|
5 import re
|
|
6 import os
|
|
7 import tempfile
|
|
8 import shutil
|
|
9 import subprocess
|
|
10 import glob
|
|
11 import argparse
|
|
12 from os.path import basename
|
|
13 import zipfile
|
|
14 import tarfile
|
|
15 import gzip
|
|
16 from galaxy.datatypes.checkers import *
|
|
17 from stacks import *
|
|
18
|
|
19
|
|
20 def __main__():
|
|
21
|
|
22 # arguments recuperation
|
|
23
|
|
24 parser = argparse.ArgumentParser()
|
|
25 parser.add_argument('-p')
|
|
26 parser.add_argument('-b')
|
|
27 parser.add_argument('-r')
|
|
28 parser.add_argument('-s')
|
|
29 parser.add_argument('-O')
|
|
30 parser.add_argument('-m')
|
|
31 parser.add_argument('-P')
|
|
32 parser.add_argument('-M')
|
|
33 parser.add_argument('-N')
|
|
34 parser.add_argument('-n')
|
|
35 parser.add_argument('-t')
|
|
36 parser.add_argument('-H')
|
|
37 parser.add_argument('--bound_low')
|
|
38 parser.add_argument('--bound_high')
|
|
39 parser.add_argument('--alpha')
|
|
40 parser.add_argument('--logfile')
|
|
41 parser.add_argument('--compress_output')
|
|
42 parser.add_argument('--catalogsnps')
|
|
43 parser.add_argument('--catalogalleles')
|
|
44 parser.add_argument('--catalogtags')
|
|
45
|
|
46 # additionnal outputs
|
|
47 parser.add_argument('--total_output')
|
|
48 parser.add_argument('--tags_output')
|
|
49 parser.add_argument('--snps_output')
|
|
50 parser.add_argument('--alleles_output')
|
|
51 parser.add_argument('--matches_output')
|
|
52
|
|
53 options = parser.parse_args()
|
|
54
|
|
55 # create working directories
|
|
56
|
|
57 os.mkdir('inputs')
|
|
58 os.mkdir('job_outputs')
|
|
59 os.mkdir('galaxy_outputs')
|
|
60
|
|
61 cmd_line = []
|
|
62 cmd_line.append('denovo_map.pl')
|
|
63
|
|
64 # if genetic map
|
|
65
|
|
66 if options.p:
|
|
67
|
|
68 # parse config files
|
|
69
|
|
70 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p)
|
|
71
|
|
72 # check if zipped files are into the tab and change tab content
|
|
73
|
|
74 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs')
|
|
75
|
|
76 # check files extension (important to have .fq or .fasta files)
|
|
77
|
|
78 check_fastq_extension_and_add(tab_parent_files, 'inputs')
|
|
79
|
|
80 # create symlink into the temp dir
|
|
81
|
|
82 create_symlinks_from_tabfiles(tab_parent_files, 'inputs')
|
|
83
|
|
84 # parse the input dir and store all file names into a tab
|
|
85
|
|
86 fastq_files = []
|
|
87 for fastq_file in glob.glob('inputs/*'):
|
|
88 # if is a file (skip repository created after a decompression)
|
|
89 if os.path.isfile(fastq_file):
|
|
90 fastq_files.append(fastq_file)
|
|
91
|
|
92 fastq_files.sort()
|
|
93
|
|
94 # test if fastq are paired-end
|
|
95 if options.b == 'true':
|
|
96 for n in range(0, len(fastq_files), 2):
|
|
97 cmd_line.extend(['-p', fastq_files[n]])
|
|
98 else:
|
|
99 for myfastqfile in fastq_files:
|
|
100 cmd_line.extend(['-p', myfastqfile])
|
|
101
|
|
102 # if genetic map with progeny files
|
|
103
|
|
104 if options.r:
|
|
105
|
|
106 # parse config files
|
|
107 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r)
|
|
108
|
|
109 # check if zipped files are into the tab and change tab content
|
|
110 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs')
|
|
111
|
|
112 # check files extension (important to have .fq or .fasta files)
|
|
113 check_fastq_extension_and_add(tab_progeny_files, 'inputs')
|
|
114
|
|
115 # create symlink into the temp dir
|
|
116 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs')
|
|
117
|
|
118 for key in tab_progeny_files:
|
|
119
|
|
120 # if is a file (skip repository created after a decompression)
|
|
121
|
|
122 if os.path.isfile('inputs/' + key):
|
|
123 cmd_line.extend(['-r', 'inputs/' + key])
|
|
124
|
|
125 # if population is checked
|
|
126 if options.s:
|
|
127
|
|
128 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s)
|
|
129
|
|
130 # check if zipped files are into the tab and change tab content
|
|
131 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs')
|
|
132
|
|
133 # check files extension (important to have .fq or .fasta files)
|
|
134 check_fastq_extension_and_add(tab_individual_files, 'inputs')
|
|
135
|
|
136 # create symlink into the temp dir
|
|
137 create_symlinks_from_tabfiles(tab_individual_files, 'inputs')
|
|
138
|
|
139 # create the command input line
|
|
140 for key in tab_individual_files:
|
|
141
|
|
142 # if is a file (skip repository created after a decompression)
|
|
143 if os.path.isfile('inputs/' + key):
|
|
144 cmd_line.extend(['-s', 'inputs/' + key])
|
|
145
|
|
146 # create the command line
|
|
147 cmd_line.extend([
|
|
148 '-S',
|
|
149 '-b',
|
|
150 '1',
|
|
151 '-T',
|
|
152 '4',
|
|
153 '-o',
|
|
154 'job_outputs/'
|
|
155 ])
|
|
156
|
|
157 if options.O:
|
|
158 cmd_line.extend(['-O', options.O])
|
|
159
|
|
160 if options.m and options.m != '-1':
|
|
161 cmd_line.extend(['-m', options.m])
|
|
162
|
|
163 if options.P and options.P != '-1':
|
|
164 cmd_line.extend(['-P', options.P])
|
|
165
|
|
166 if options.M and options.M != '-1':
|
|
167 cmd_line.extend(['-M', options.M])
|
|
168
|
|
169 if options.N and options.N != '-1':
|
|
170 cmd_line.extend(['-N', options.N])
|
|
171
|
|
172 if options.n and options.n != '-1':
|
|
173 cmd_line.extend(['-n', options.n])
|
|
174
|
|
175 if options.t and options.t == 'true':
|
|
176 cmd_line.append('-t')
|
|
177
|
|
178 if options.H and options.H == 'true':
|
|
179 cmd_line.append('-H')
|
|
180
|
|
181 ## SNP model
|
|
182 if options.bound_low:
|
|
183 cmd_line.extend(['--bound_low', options.bound_low])
|
|
184 cmd_line.extend(['--bound_high', options.bound_high])
|
|
185
|
|
186 if options.alpha:
|
|
187 cmd_line.extend(['--alpha', options.alpha])
|
|
188
|
|
189 # launch the command line
|
|
190 print "[CMD_LINE] : "+' '.join(cmd_line)
|
|
191
|
|
192 p = subprocess.call(cmd_line)
|
|
193
|
|
194 # postprocesses
|
|
195 try:
|
|
196 shutil.move('job_outputs/denovo_map.log', options.logfile)
|
|
197 except:
|
|
198 sys.stderr.write('Error in denovo_map execution; Please read the additional output (stdout)\n')
|
|
199 sys.exit(1)
|
|
200
|
|
201 # go inside the outputs dir
|
|
202 os.chdir('job_outputs')
|
|
203
|
|
204 # move files
|
|
205 for i in glob.glob('*'):
|
|
206 if re.search('catalog.snps.tsv$', i):
|
|
207 shutil.copy(i, options.catalogsnps)
|
|
208 if re.search('catalog.alleles.tsv$', i):
|
|
209 shutil.copy(i, options.catalogalleles)
|
|
210 if re.search('catalog.tags.tsv$', i):
|
|
211 shutil.copy(i, options.catalogtags)
|
|
212
|
|
213 list_files = glob.glob('*')
|
|
214
|
|
215 # if compress output is total
|
|
216 if options.compress_output == 'total':
|
|
217
|
|
218 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w',
|
|
219 allowZip64=True)
|
|
220
|
|
221 for i in list_files:
|
|
222 mytotalzipfile.write(os.path.basename(i))
|
|
223
|
|
224 # return the unique archive
|
|
225 shutil.move('total.zip.temp', options.total_output)
|
|
226 elif options.compress_output == 'categories':
|
|
227
|
|
228 # if compress output is by categories
|
|
229 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True)
|
|
230 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True)
|
|
231 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True)
|
|
232 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True)
|
|
233
|
|
234 for i in list_files:
|
|
235 # for each type of files
|
|
236 if re.search("tags\.tsv$", i) and not re.search('batch', i):
|
|
237 mytagszip.write(os.path.basename(i))
|
|
238 os.remove(i)
|
|
239 elif re.search("snps\.tsv$", i) and not re.search('batch', i):
|
|
240 mysnpszip.write(os.path.basename(i))
|
|
241 os.remove(i)
|
|
242 elif re.search("alleles\.tsv$", i) and not re.search('batch', i):
|
|
243 myalleleszip.write(os.path.basename(i))
|
|
244 os.remove(i)
|
|
245 elif re.search("matches\.tsv$", i) and not re.search('batch', i):
|
|
246 mymatcheszip.write(os.path.basename(i))
|
|
247 os.remove(i)
|
|
248 else:
|
|
249 shutil.move(os.path.basename(i), '../galaxy_outputs')
|
|
250
|
|
251 # return archives....
|
|
252 shutil.move('tags.zip.temp', options.tags_output)
|
|
253 shutil.move('snps.zip.temp', options.snps_output)
|
|
254 shutil.move('alleles.zip.temp', options.alleles_output)
|
|
255 shutil.move('matches.zip.temp', options.matches_output)
|
|
256 else:
|
|
257 # else no compression
|
|
258 for i in list_files:
|
|
259 shutil.move(os.path.basename(i), '../galaxy_outputs')
|
|
260
|
|
261
|
|
262 if __name__ == '__main__':
|
|
263 __main__()
|
|
264
|
|
265
|