comparison STACKS_population.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 import re
6 import os
7 import tempfile
8 import shutil
9 import subprocess
10 import glob
11 import argparse
12 from os.path import basename
13 import zipfile
14 import tarfile
15 import gzip
16 from galaxy.datatypes.checkers import *
17 from stacks import *
18
19
20 def __main__():
21
22 # arguments recuperation
23
24 parser = argparse.ArgumentParser()
25 parser.add_argument('-P')
26 parser.add_argument('-M')
27 parser.add_argument('-b')
28 parser.add_argument('--vcf', action='store_true')
29 parser.add_argument('--genepop', action='store_true')
30 parser.add_argument('--structure', action='store_true')
31 parser.add_argument('-e')
32 parser.add_argument('--genomic', action='store_true')
33 parser.add_argument('--fasta', action='store_true')
34 parser.add_argument('--phase', action='store_true')
35 parser.add_argument('--beagle', action='store_true')
36 parser.add_argument('--plink', action='store_true')
37 parser.add_argument('--phylip', action='store_true')
38 parser.add_argument('--phylip_var', action='store_true')
39 parser.add_argument('--write_single_snp', action='store_true')
40 parser.add_argument('-k', action='store_true')
41
42 # advanced options
43 parser.add_argument('--advanced_options_activate')
44 parser.add_argument('-B')
45 parser.add_argument('-W')
46 parser.add_argument('-r')
47 parser.add_argument('-p')
48 parser.add_argument('-m')
49 parser.add_argument('-a')
50 parser.add_argument('-f')
51 parser.add_argument('--p_value_cutoff')
52 parser.add_argument('--window_size')
53 parser.add_argument('--bootstrap')
54 parser.add_argument('--bootstrap_reps')
55
56 # multifile management
57 parser.add_argument('--logfile')
58
59 # outputs
60 parser.add_argument('--ss')
61 parser.add_argument('--s')
62
63 # optional outputs
64 parser.add_argument('--ov')
65 parser.add_argument('--op')
66 parser.add_argument('--ol')
67 parser.add_argument('--of')
68 parser.add_argument('--os')
69 parser.add_argument('--oe')
70 parser.add_argument('--om')
71 parser.add_argument('--og')
72
73 parser.add_argument('--unphased_output')
74 parser.add_argument('--markers_output')
75 parser.add_argument('--phase_output')
76 parser.add_argument('--fst_output')
77
78 options = parser.parse_args()
79
80 # create the working dir
81 os.mkdir('job_outputs')
82 os.mkdir('galaxy_outputs')
83
84 os.chdir('job_outputs')
85
86 # STACKS_archive
87 # check if zipped files are into the tab
88 extract_compress_files(options.P, os.getcwd())
89
90 # create the populations command input line
91 cmd_line=['populations']
92 cmd_line.extend(['-b', options.b, '-P', os.getcwd(), '-M', options.M])
93
94 if options.e:
95 cmd_line.extend(['-e', options.e, options.genomic])
96
97 # output options
98 if options.vcf:
99 cmd_line.append('--vcf')
100 if options.genepop:
101 cmd_line.append('--genepop')
102 if options.structure:
103 cmd_line.append('--structure')
104 if options.fasta:
105 cmd_line.append('--fasta')
106 if options.phase:
107 cmd_line.append('--phase')
108 if options.beagle:
109 cmd_line.append('--beagle')
110 if options.plink:
111 cmd_line.append('--plink')
112 if options.phylip:
113 cmd_line.append('--phylip')
114 if options.phylip_var and options.phylip:
115 cmd_line.append('--phylip_var')
116 if options.write_single_snp and (options.genepop or options.structure):
117 cmd_line.append('--write_single_snp')
118
119 if options.k:
120 cmd_line.extend(['-k', '--window_size', options.window_size])
121
122 if options.advanced_options_activate == 'true':
123 if options.B:
124 cmd_line.extend(['-B', options.B])
125 if options.W:
126 cmd_line.extend(['-W', options.W])
127
128 cmd_line.extend(['-r', options.r])
129 cmd_line.extend(['-p', options.p])
130 cmd_line.extend(['-m', options.m])
131 cmd_line.extend(['-a', options.a])
132
133 if options.f:
134 cmd_line.extend(['-f', options.f, '--p_value_cutoff', options.p_value_cutoff])
135 if options.bootstrap:
136 cmd_line.extend(['--bootstrap', options.bootstrap, '--bootstrap_reps', options.bootstrap_reps])
137
138 print "[CMD]:"+' '.join(cmd_line)
139 subprocess.call(cmd_line)
140
141 # postprocesses
142 try:
143 shutil.copy('batch_1.populations.log', options.logfile)
144 except:
145 sys.stderr.write('Error in population execution; Please read the additional output (stdout)\n')
146 sys.exit(1)
147
148 try:
149 shutil.move(glob.glob('*.sumstats_summary.tsv')[0], options.ss)
150 except:
151 print "No sumstats summary file"
152
153 try:
154 shutil.move(glob.glob('*.sumstats.tsv')[0], options.s)
155 except:
156 print "No sumstats file"
157
158 # move additionnal output files
159 if options.vcf:
160 try:
161 shutil.move(glob.glob('*.vcf')[0], options.ov)
162 except:
163 print "No VCF files"
164
165 if options.phylip:
166 try:
167 shutil.move(glob.glob('*.phylip')[0], options.op)
168 shutil.move(glob.glob('*.phylip.log')[0], options.ol)
169 except:
170 print "No phylip file"
171
172 if options.fasta:
173 try:
174 shutil.move(glob.glob('*.fa')[0], options.of)
175 except:
176 print "No fasta files"
177
178 if options.structure:
179 try:
180 shutil.move(glob.glob('*.structure.tsv')[0], options.os)
181 except:
182 print "No structure file"
183
184 if options.plink :
185 try:
186 shutil.move(glob.glob('*.ped')[0], options.oe)
187 shutil.move(glob.glob('*.map')[0], options.om)
188 except:
189 print "No ped and map file"
190
191 if options.genepop :
192 try:
193 shutil.move(glob.glob('*.genepop')[0], options.og)
194 except:
195 print "No genepop file"
196
197 # copy all files inside tmp_dir into workdir or into an archive....
198 list_files = glob.glob('*')
199
200 markerszip = zipfile.ZipFile('markers.zip.temp', 'w',
201 allowZip64=True)
202 phasezip = zipfile.ZipFile('phase.zip.temp', 'w', allowZip64=True)
203 unphasedzip = zipfile.ZipFile('unphased.zip.temp', 'w',
204 allowZip64=True)
205 fstzip = zipfile.ZipFile('fst.zip.temp', 'w', allowZip64=True)
206
207 for i in list_files:
208 # for each type of files
209 if re.search("\.markers$", i):
210 markerszip.write(i)
211 elif re.search("phase\.inp$", i):
212 phasezip.write(i)
213 elif re.search("unphased\.bgl$", i):
214 unphasedzip.write(i)
215 elif re.search('fst', i):
216 fstzip.write(i)
217 else:
218 # else return original files
219 if re.search('^batch', os.path.basename(i)) \
220 and not re.search("\.tsv$", os.path.basename(i)) \
221 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)):
222 shutil.move(i, '../galaxy_outputs')
223
224 # close zip files
225 markerszip.close()
226 phasezip.close()
227 unphasedzip.close()
228 fstzip.close()
229
230 # return archives
231 shutil.move('fst.zip.temp', options.fst_output)
232 if options.beagle:
233 shutil.move('markers.zip.temp', options.markers_output)
234 shutil.move('unphased.zip.temp', options.unphased_output)
235 if options.phase:
236 shutil.move('phase.zip.temp', options.phase_output)
237
238
239 if __name__ == '__main__':
240 __main__()
241
242
243