comparison STACKS_denovomap.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 import os
7 import tempfile
8 import shutil
9 import subprocess
10 import glob
11 import argparse
12 from os.path import basename
13 import zipfile
14 import tarfile
15 import gzip
16 from galaxy.datatypes.checkers import *
17 from stacks import *
18
19
20 def __main__():
21
22 # arguments recuperation
23
24 parser = argparse.ArgumentParser()
25 parser.add_argument('-p')
26 parser.add_argument('-b')
27 parser.add_argument('-r')
28 parser.add_argument('-s')
29 parser.add_argument('-O')
30 parser.add_argument('-m')
31 parser.add_argument('-P')
32 parser.add_argument('-M')
33 parser.add_argument('-N')
34 parser.add_argument('-n')
35 parser.add_argument('-t')
36 parser.add_argument('-H')
37 parser.add_argument('--bound_low')
38 parser.add_argument('--bound_high')
39 parser.add_argument('--alpha')
40 parser.add_argument('--logfile')
41 parser.add_argument('--compress_output')
42 parser.add_argument('--catalogsnps')
43 parser.add_argument('--catalogalleles')
44 parser.add_argument('--catalogtags')
45
46 # additionnal outputs
47 parser.add_argument('--total_output')
48 parser.add_argument('--tags_output')
49 parser.add_argument('--snps_output')
50 parser.add_argument('--alleles_output')
51 parser.add_argument('--matches_output')
52
53 options = parser.parse_args()
54
55 # create working directories
56
57 os.mkdir('inputs')
58 os.mkdir('job_outputs')
59 os.mkdir('galaxy_outputs')
60
61 cmd_line = []
62 cmd_line.append('denovo_map.pl')
63
64 # if genetic map
65
66 if options.p:
67
68 # parse config files
69
70 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p)
71
72 # check if zipped files are into the tab and change tab content
73
74 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs')
75
76 # check files extension (important to have .fq or .fasta files)
77
78 check_fastq_extension_and_add(tab_parent_files, 'inputs')
79
80 # create symlink into the temp dir
81
82 create_symlinks_from_tabfiles(tab_parent_files, 'inputs')
83
84 # parse the input dir and store all file names into a tab
85
86 fastq_files = []
87 for fastq_file in glob.glob('inputs/*'):
88 # if is a file (skip repository created after a decompression)
89 if os.path.isfile(fastq_file):
90 fastq_files.append(fastq_file)
91
92 fastq_files.sort()
93
94 # test if fastq are paired-end
95 if options.b == 'true':
96 for n in range(0, len(fastq_files), 2):
97 cmd_line.extend(['-p', fastq_files[n]])
98 else:
99 for myfastqfile in fastq_files:
100 cmd_line.extend(['-p', myfastqfile])
101
102 # if genetic map with progeny files
103
104 if options.r:
105
106 # parse config files
107 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r)
108
109 # check if zipped files are into the tab and change tab content
110 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs')
111
112 # check files extension (important to have .fq or .fasta files)
113 check_fastq_extension_and_add(tab_progeny_files, 'inputs')
114
115 # create symlink into the temp dir
116 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs')
117
118 for key in tab_progeny_files:
119
120 # if is a file (skip repository created after a decompression)
121
122 if os.path.isfile('inputs/' + key):
123 cmd_line.extend(['-r', 'inputs/' + key])
124
125 # if population is checked
126 if options.s:
127
128 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s)
129
130 # check if zipped files are into the tab and change tab content
131 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs')
132
133 # check files extension (important to have .fq or .fasta files)
134 check_fastq_extension_and_add(tab_individual_files, 'inputs')
135
136 # create symlink into the temp dir
137 create_symlinks_from_tabfiles(tab_individual_files, 'inputs')
138
139 # create the command input line
140 for key in tab_individual_files:
141
142 # if is a file (skip repository created after a decompression)
143 if os.path.isfile('inputs/' + key):
144 cmd_line.extend(['-s', 'inputs/' + key])
145
146 # create the command line
147 cmd_line.extend([
148 '-S',
149 '-b',
150 '1',
151 '-T',
152 '4',
153 '-o',
154 'job_outputs/'
155 ])
156
157 if options.O:
158 cmd_line.extend(['-O', options.O])
159
160 if options.m and options.m != '-1':
161 cmd_line.extend(['-m', options.m])
162
163 if options.P and options.P != '-1':
164 cmd_line.extend(['-P', options.P])
165
166 if options.M and options.M != '-1':
167 cmd_line.extend(['-M', options.M])
168
169 if options.N and options.N != '-1':
170 cmd_line.extend(['-N', options.N])
171
172 if options.n and options.n != '-1':
173 cmd_line.extend(['-n', options.n])
174
175 if options.t and options.t == 'true':
176 cmd_line.append('-t')
177
178 if options.H and options.H == 'true':
179 cmd_line.append('-H')
180
181 ## SNP model
182 if options.bound_low:
183 cmd_line.extend(['--bound_low', options.bound_low])
184 cmd_line.extend(['--bound_high', options.bound_high])
185
186 if options.alpha:
187 cmd_line.extend(['--alpha', options.alpha])
188
189 # launch the command line
190 print "[CMD_LINE] : "+' '.join(cmd_line)
191
192 p = subprocess.call(cmd_line)
193
194 # postprocesses
195 try:
196 shutil.move('job_outputs/denovo_map.log', options.logfile)
197 except:
198 sys.stderr.write('Error in denovo_map execution; Please read the additional output (stdout)\n')
199 sys.exit(1)
200
201 # go inside the outputs dir
202 os.chdir('job_outputs')
203
204 # move files
205 for i in glob.glob('*'):
206 if re.search('catalog.snps.tsv$', i):
207 shutil.copy(i, options.catalogsnps)
208 if re.search('catalog.alleles.tsv$', i):
209 shutil.copy(i, options.catalogalleles)
210 if re.search('catalog.tags.tsv$', i):
211 shutil.copy(i, options.catalogtags)
212
213 list_files = glob.glob('*')
214
215 # if compress output is total
216 if options.compress_output == 'total':
217
218 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w',
219 allowZip64=True)
220
221 for i in list_files:
222 mytotalzipfile.write(os.path.basename(i))
223
224 # return the unique archive
225 shutil.move('total.zip.temp', options.total_output)
226 elif options.compress_output == 'categories':
227
228 # if compress output is by categories
229 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True)
230 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True)
231 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True)
232 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True)
233
234 for i in list_files:
235 # for each type of files
236 if re.search("tags\.tsv$", i) and not re.search('batch', i):
237 mytagszip.write(os.path.basename(i))
238 os.remove(i)
239 elif re.search("snps\.tsv$", i) and not re.search('batch', i):
240 mysnpszip.write(os.path.basename(i))
241 os.remove(i)
242 elif re.search("alleles\.tsv$", i) and not re.search('batch', i):
243 myalleleszip.write(os.path.basename(i))
244 os.remove(i)
245 elif re.search("matches\.tsv$", i) and not re.search('batch', i):
246 mymatcheszip.write(os.path.basename(i))
247 os.remove(i)
248 else:
249 shutil.move(os.path.basename(i), '../galaxy_outputs')
250
251 # return archives....
252 shutil.move('tags.zip.temp', options.tags_output)
253 shutil.move('snps.zip.temp', options.snps_output)
254 shutil.move('alleles.zip.temp', options.alleles_output)
255 shutil.move('matches.zip.temp', options.matches_output)
256 else:
257 # else no compression
258 for i in list_files:
259 shutil.move(os.path.basename(i), '../galaxy_outputs')
260
261
262 if __name__ == '__main__':
263 __main__()
264
265