Mercurial > repos > cmonjeau > stacks
comparison STACKS_refmap.py @ 0:d6ba40f6c824
first commit
author | cmonjeau |
---|---|
date | Mon, 24 Aug 2015 09:29:12 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d6ba40f6c824 |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 import re | |
6 import os | |
7 import tempfile | |
8 import shutil | |
9 import subprocess | |
10 import glob | |
11 import optparse | |
12 from os.path import basename | |
13 import zipfile | |
14 import tarfile | |
15 import gzip | |
16 from galaxy.datatypes.checkers import * | |
17 from stacks import * | |
18 | |
19 | |
20 def __main__(): | |
21 | |
22 # arguments recuperation | |
23 | |
24 parser = optparse.OptionParser() | |
25 parser.add_option('-p') | |
26 parser.add_option('-r') | |
27 parser.add_option('-s') | |
28 parser.add_option('-O') | |
29 parser.add_option('-n') | |
30 parser.add_option('-m') | |
31 parser.add_option('--bound_low') | |
32 parser.add_option('--bound_high') | |
33 parser.add_option('--alpha') | |
34 parser.add_option('--logfile') | |
35 parser.add_option('--compress_output') | |
36 parser.add_option('--catalogsnps') | |
37 parser.add_option('--catalogalleles') | |
38 parser.add_option('--catalogtags') | |
39 | |
40 # additionnal outputs | |
41 | |
42 parser.add_option('--total_output') | |
43 parser.add_option('--tags_output') | |
44 parser.add_option('--snps_output') | |
45 parser.add_option('--alleles_output') | |
46 parser.add_option('--matches_output') | |
47 (options, args) = parser.parse_args() | |
48 | |
49 # create working directories | |
50 | |
51 os.mkdir('inputs') | |
52 os.mkdir('job_outputs') | |
53 os.mkdir('galaxy_outputs') | |
54 | |
55 cmd_line = [] | |
56 cmd_line.append('ref_map.pl') | |
57 | |
58 # if genetic map | |
59 | |
60 if options.p: | |
61 | |
62 # parse config files | |
63 | |
64 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p) | |
65 | |
66 # check if zipped files are into the tab and change tab content | |
67 | |
68 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs') | |
69 | |
70 # check files extension (important to have .sam files) | |
71 | |
72 check_sam_extension_and_add(tab_parent_files, 'inputs') | |
73 | |
74 # create symlink into the temp dir | |
75 | |
76 create_symlinks_from_tabfiles(tab_parent_files, 'inputs') | |
77 | |
78 # create the command input line | |
79 | |
80 for key in tab_parent_files: | |
81 | |
82 # if is a file (skip repository created after a decompression) | |
83 | |
84 if os.path.isfile('inputs/'+key): | |
85 cmd_line.extend(['-p', os.path.normpath('inputs/'+key)]) | |
86 | |
87 # if genetic map with progeny files | |
88 | |
89 if options.r: | |
90 | |
91 # parse config files | |
92 | |
93 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r) | |
94 | |
95 # check if zipped files are into the tab and change tab content | |
96 | |
97 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs') | |
98 | |
99 # check files extension (important to have .sam files) | |
100 | |
101 check_sam_extension_and_add(tab_progeny_files, 'inputs') | |
102 | |
103 # create symlink into the temp dir | |
104 | |
105 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs') | |
106 | |
107 for key in tab_progeny_files: | |
108 | |
109 # if is a file (skip repository created after a decompression) | |
110 | |
111 if os.path.isfile('inputs/' + key): | |
112 cmd_line.extend(['-r', 'inputs/' + key]) | |
113 | |
114 # parse config files and create symlink if individual files are selected | |
115 | |
116 if options.s: | |
117 | |
118 # parse config files | |
119 | |
120 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s) | |
121 | |
122 # check if zipped files are into the tab and change tab content | |
123 | |
124 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs') | |
125 | |
126 # check files extension (important to have .sam files) | |
127 | |
128 check_sam_extension_and_add(tab_individual_files, 'inputs') | |
129 | |
130 # create symlink into the temp dir | |
131 | |
132 create_symlinks_from_tabfiles(tab_individual_files, 'inputs') | |
133 | |
134 # create the command input line | |
135 | |
136 for key in tab_individual_files: | |
137 cmd_line.extend(['-s', 'inputs/' + key]) | |
138 | |
139 # create the options command line | |
140 | |
141 cmd_line.extend([ | |
142 '-S', | |
143 '-b', '1', | |
144 '-T', '4', | |
145 '-o', 'job_outputs', | |
146 '-n', options.n, | |
147 '-m', options.m, | |
148 ]) | |
149 | |
150 if options.O: | |
151 cmd_line.extend(['-O', options.O]) | |
152 | |
153 if options.bound_low: | |
154 cmd_line.extend(['--bound_low', options.bound_low]) | |
155 | |
156 if options.bound_high: | |
157 cmd_line.extend(['--bound_high', options.bound_high]) | |
158 | |
159 if options.alpha: | |
160 cmd_line.extend(['--alpha', options.alpha]) | |
161 | |
162 # execute job | |
163 | |
164 print '[COMMAND LINE]' + ' '.join(cmd_line) | |
165 | |
166 p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE, | |
167 stderr=subprocess.PIPE) | |
168 | |
169 (stdoutput, stderror) = p.communicate() | |
170 | |
171 print stdoutput | |
172 print stderror | |
173 | |
174 # postprocesses | |
175 | |
176 try: | |
177 shutil.move('job_outputs/ref_map.log', options.logfile) | |
178 except: | |
179 sys.stderr.write('Error in ref_map execution; Please read the additional output (stdout)\n') | |
180 | |
181 # go inside the outputs dir | |
182 | |
183 os.chdir('job_outputs') | |
184 | |
185 # move files | |
186 | |
187 for i in glob.glob('*'): | |
188 if re.search('catalog.snps.tsv$', i): | |
189 shutil.copy(i, options.catalogsnps) | |
190 if re.search('catalog.alleles.tsv$', i): | |
191 shutil.copy(i, options.catalogalleles) | |
192 if re.search('catalog.tags.tsv$', i): | |
193 shutil.copy(i, options.catalogtags) | |
194 | |
195 # copy all files inside tmp_dir into workdir | |
196 | |
197 list_files = glob.glob('*') | |
198 | |
199 # if compress output is total | |
200 | |
201 if options.compress_output == 'total': | |
202 | |
203 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w', | |
204 allowZip64=True) | |
205 | |
206 for i in list_files: | |
207 | |
208 mytotalzipfile.write(os.path.basename(i)) | |
209 | |
210 # return the unique archive | |
211 | |
212 shutil.move('total.zip.temp', options.total_output) | |
213 elif options.compress_output == 'categories': | |
214 | |
215 # if compress output is by categories | |
216 | |
217 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True) | |
218 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True) | |
219 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True) | |
220 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True) | |
221 | |
222 for i in list_files: | |
223 | |
224 # for each type of files | |
225 | |
226 if re.search("tags\.tsv$", i) and not re.search('batch', i): | |
227 mytagszip.write(os.path.basename(i)) | |
228 os.remove(i) | |
229 elif re.search("snps\.tsv$", i) and not re.search('batch', i): | |
230 mysnpszip.write(os.path.basename(i)) | |
231 os.remove(i) | |
232 elif re.search("alleles\.tsv$", i) and not re.search('batch', i): | |
233 myalleleszip.write(os.path.basename(i)) | |
234 os.remove(i) | |
235 elif re.search("matches\.tsv$", i) and not re.search('batch', i): | |
236 mymatcheszip.write(os.path.basename(i)) | |
237 os.remove(i) | |
238 else: | |
239 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
240 | |
241 # return archives.... | |
242 | |
243 shutil.move('tags.zip.temp', options.tags_output) | |
244 shutil.move('snps.zip.temp', options.snps_output) | |
245 shutil.move('alleles.zip.temp', options.alleles_output) | |
246 shutil.move('matches.zip.temp', options.matches_output) | |
247 else: | |
248 | |
249 # else no compression | |
250 | |
251 for i in list_files: | |
252 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
253 | |
254 | |
255 if __name__ == '__main__': | |
256 __main__() | |
257 | |
258 |