Mercurial > repos > cmonjeau > stacks
comparison STACKS_denovomap.py @ 0:d6ba40f6c824
first commit
author | cmonjeau |
---|---|
date | Mon, 24 Aug 2015 09:29:12 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d6ba40f6c824 |
---|---|
1 #!/usr/bin/python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 import re | |
6 import os | |
7 import tempfile | |
8 import shutil | |
9 import subprocess | |
10 import glob | |
11 import argparse | |
12 from os.path import basename | |
13 import zipfile | |
14 import tarfile | |
15 import gzip | |
16 from galaxy.datatypes.checkers import * | |
17 from stacks import * | |
18 | |
19 | |
20 def __main__(): | |
21 | |
22 # arguments recuperation | |
23 | |
24 parser = argparse.ArgumentParser() | |
25 parser.add_argument('-p') | |
26 parser.add_argument('-b') | |
27 parser.add_argument('-r') | |
28 parser.add_argument('-s') | |
29 parser.add_argument('-O') | |
30 parser.add_argument('-m') | |
31 parser.add_argument('-P') | |
32 parser.add_argument('-M') | |
33 parser.add_argument('-N') | |
34 parser.add_argument('-n') | |
35 parser.add_argument('-t') | |
36 parser.add_argument('-H') | |
37 parser.add_argument('--bound_low') | |
38 parser.add_argument('--bound_high') | |
39 parser.add_argument('--alpha') | |
40 parser.add_argument('--logfile') | |
41 parser.add_argument('--compress_output') | |
42 parser.add_argument('--catalogsnps') | |
43 parser.add_argument('--catalogalleles') | |
44 parser.add_argument('--catalogtags') | |
45 | |
46 # additionnal outputs | |
47 parser.add_argument('--total_output') | |
48 parser.add_argument('--tags_output') | |
49 parser.add_argument('--snps_output') | |
50 parser.add_argument('--alleles_output') | |
51 parser.add_argument('--matches_output') | |
52 | |
53 options = parser.parse_args() | |
54 | |
55 # create working directories | |
56 | |
57 os.mkdir('inputs') | |
58 os.mkdir('job_outputs') | |
59 os.mkdir('galaxy_outputs') | |
60 | |
61 cmd_line = [] | |
62 cmd_line.append('denovo_map.pl') | |
63 | |
64 # if genetic map | |
65 | |
66 if options.p: | |
67 | |
68 # parse config files | |
69 | |
70 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p) | |
71 | |
72 # check if zipped files are into the tab and change tab content | |
73 | |
74 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs') | |
75 | |
76 # check files extension (important to have .fq or .fasta files) | |
77 | |
78 check_fastq_extension_and_add(tab_parent_files, 'inputs') | |
79 | |
80 # create symlink into the temp dir | |
81 | |
82 create_symlinks_from_tabfiles(tab_parent_files, 'inputs') | |
83 | |
84 # parse the input dir and store all file names into a tab | |
85 | |
86 fastq_files = [] | |
87 for fastq_file in glob.glob('inputs/*'): | |
88 # if is a file (skip repository created after a decompression) | |
89 if os.path.isfile(fastq_file): | |
90 fastq_files.append(fastq_file) | |
91 | |
92 fastq_files.sort() | |
93 | |
94 # test if fastq are paired-end | |
95 if options.b == 'true': | |
96 for n in range(0, len(fastq_files), 2): | |
97 cmd_line.extend(['-p', fastq_files[n]]) | |
98 else: | |
99 for myfastqfile in fastq_files: | |
100 cmd_line.extend(['-p', myfastqfile]) | |
101 | |
102 # if genetic map with progeny files | |
103 | |
104 if options.r: | |
105 | |
106 # parse config files | |
107 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r) | |
108 | |
109 # check if zipped files are into the tab and change tab content | |
110 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs') | |
111 | |
112 # check files extension (important to have .fq or .fasta files) | |
113 check_fastq_extension_and_add(tab_progeny_files, 'inputs') | |
114 | |
115 # create symlink into the temp dir | |
116 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs') | |
117 | |
118 for key in tab_progeny_files: | |
119 | |
120 # if is a file (skip repository created after a decompression) | |
121 | |
122 if os.path.isfile('inputs/' + key): | |
123 cmd_line.extend(['-r', 'inputs/' + key]) | |
124 | |
125 # if population is checked | |
126 if options.s: | |
127 | |
128 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s) | |
129 | |
130 # check if zipped files are into the tab and change tab content | |
131 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs') | |
132 | |
133 # check files extension (important to have .fq or .fasta files) | |
134 check_fastq_extension_and_add(tab_individual_files, 'inputs') | |
135 | |
136 # create symlink into the temp dir | |
137 create_symlinks_from_tabfiles(tab_individual_files, 'inputs') | |
138 | |
139 # create the command input line | |
140 for key in tab_individual_files: | |
141 | |
142 # if is a file (skip repository created after a decompression) | |
143 if os.path.isfile('inputs/' + key): | |
144 cmd_line.extend(['-s', 'inputs/' + key]) | |
145 | |
146 # create the command line | |
147 cmd_line.extend([ | |
148 '-S', | |
149 '-b', | |
150 '1', | |
151 '-T', | |
152 '4', | |
153 '-o', | |
154 'job_outputs/' | |
155 ]) | |
156 | |
157 if options.O: | |
158 cmd_line.extend(['-O', options.O]) | |
159 | |
160 if options.m and options.m != '-1': | |
161 cmd_line.extend(['-m', options.m]) | |
162 | |
163 if options.P and options.P != '-1': | |
164 cmd_line.extend(['-P', options.P]) | |
165 | |
166 if options.M and options.M != '-1': | |
167 cmd_line.extend(['-M', options.M]) | |
168 | |
169 if options.N and options.N != '-1': | |
170 cmd_line.extend(['-N', options.N]) | |
171 | |
172 if options.n and options.n != '-1': | |
173 cmd_line.extend(['-n', options.n]) | |
174 | |
175 if options.t and options.t == 'true': | |
176 cmd_line.append('-t') | |
177 | |
178 if options.H and options.H == 'true': | |
179 cmd_line.append('-H') | |
180 | |
181 ## SNP model | |
182 if options.bound_low: | |
183 cmd_line.extend(['--bound_low', options.bound_low]) | |
184 cmd_line.extend(['--bound_high', options.bound_high]) | |
185 | |
186 if options.alpha: | |
187 cmd_line.extend(['--alpha', options.alpha]) | |
188 | |
189 # launch the command line | |
190 print "[CMD_LINE] : "+' '.join(cmd_line) | |
191 | |
192 p = subprocess.call(cmd_line) | |
193 | |
194 # postprocesses | |
195 try: | |
196 shutil.move('job_outputs/denovo_map.log', options.logfile) | |
197 except: | |
198 sys.stderr.write('Error in denovo_map execution; Please read the additional output (stdout)\n') | |
199 sys.exit(1) | |
200 | |
201 # go inside the outputs dir | |
202 os.chdir('job_outputs') | |
203 | |
204 # move files | |
205 for i in glob.glob('*'): | |
206 if re.search('catalog.snps.tsv$', i): | |
207 shutil.copy(i, options.catalogsnps) | |
208 if re.search('catalog.alleles.tsv$', i): | |
209 shutil.copy(i, options.catalogalleles) | |
210 if re.search('catalog.tags.tsv$', i): | |
211 shutil.copy(i, options.catalogtags) | |
212 | |
213 list_files = glob.glob('*') | |
214 | |
215 # if compress output is total | |
216 if options.compress_output == 'total': | |
217 | |
218 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w', | |
219 allowZip64=True) | |
220 | |
221 for i in list_files: | |
222 mytotalzipfile.write(os.path.basename(i)) | |
223 | |
224 # return the unique archive | |
225 shutil.move('total.zip.temp', options.total_output) | |
226 elif options.compress_output == 'categories': | |
227 | |
228 # if compress output is by categories | |
229 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True) | |
230 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True) | |
231 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True) | |
232 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True) | |
233 | |
234 for i in list_files: | |
235 # for each type of files | |
236 if re.search("tags\.tsv$", i) and not re.search('batch', i): | |
237 mytagszip.write(os.path.basename(i)) | |
238 os.remove(i) | |
239 elif re.search("snps\.tsv$", i) and not re.search('batch', i): | |
240 mysnpszip.write(os.path.basename(i)) | |
241 os.remove(i) | |
242 elif re.search("alleles\.tsv$", i) and not re.search('batch', i): | |
243 myalleleszip.write(os.path.basename(i)) | |
244 os.remove(i) | |
245 elif re.search("matches\.tsv$", i) and not re.search('batch', i): | |
246 mymatcheszip.write(os.path.basename(i)) | |
247 os.remove(i) | |
248 else: | |
249 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
250 | |
251 # return archives.... | |
252 shutil.move('tags.zip.temp', options.tags_output) | |
253 shutil.move('snps.zip.temp', options.snps_output) | |
254 shutil.move('alleles.zip.temp', options.alleles_output) | |
255 shutil.move('matches.zip.temp', options.matches_output) | |
256 else: | |
257 # else no compression | |
258 for i in list_files: | |
259 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
260 | |
261 | |
262 if __name__ == '__main__': | |
263 __main__() | |
264 | |
265 |