annotate STACKS_genotypes.py @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
1 #!/usr/bin/python
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
2 # -*- coding: utf-8 -*-
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
3
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
4 import sys
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
5 import re
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
6 import os
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
7 import tempfile
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
8 import shutil
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
9 import subprocess
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
10 import glob
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
11 import argparse
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
12 from os.path import basename
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
13 import zipfile
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
14 import tarfile
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
15 import gzip
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
16 from galaxy.datatypes.checkers import *
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
17 from stacks import *
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
18
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
19
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
20 def __main__():
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
21
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
22 # arguments recuperation
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
23
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
24 parser = argparse.ArgumentParser()
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
25 parser.add_argument('-P')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
26 parser.add_argument('-b')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
27 parser.add_argument('-c')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
28 parser.add_argument('-t')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
29 parser.add_argument('-o')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
30 parser.add_argument('-e')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
31 parser.add_argument('--active_advanced')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
32 parser.add_argument('-r')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
33 parser.add_argument('-m')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
34 parser.add_argument('-B')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
35 parser.add_argument('-W')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
36 parser.add_argument('--active_autocorrect')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
37 parser.add_argument('--min_hom_seqs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
38 parser.add_argument('--min_het_seqs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
39 parser.add_argument('--max_het_seqs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
40
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
41 # multifile management
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
42
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
43 parser.add_argument('--logfile')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
44 parser.add_argument('--compress_output')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
45
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
46 # additionnal outputs
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
47
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
48 parser.add_argument('--total_output')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
49
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
50 options = parser.parse_args()
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
51
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
52 # create the working dir
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
53
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
54 os.mkdir('job_outputs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
55 os.mkdir('galaxy_outputs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
56
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
57 os.chdir('job_outputs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
58
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
59 # edit the command line
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
60
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
61 cmd_line = []
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
62 cmd_line.append("genotypes")
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
63
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
64 # STACKS_archive
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
65 # check if zipped files are into the tab
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
66
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
67 extract_compress_files(options.P, os.getcwd())
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
68
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
69 # create the genotypes command input line
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
70
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
71 cmd_line.extend(["-b", options.b, "-P", os.getcwd()])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
72
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
73 # create the genotypes command line
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
74
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
75 if options.e:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
76 cmd_line.extend(["-e", options.e])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
77 if options.c == 'true':
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
78 cmd_line.append("-c")
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
79 if options.t:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
80 cmd_line.extend(["-t", options.t])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
81 if options.o:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
82 cmd_line.extend(["-o", options.o])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
83
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
84 # if advanced is activate
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
85 if options.active_advanced == "true":
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
86 cmd_line.extend(["-r", options.r])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
87 cmd_line.extend(["-m", options.m])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
88 if options.B:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
89 cmd_line.extend(["-B", options.B])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
90 if options.W:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
91 cmd_line.extend(["-W", options.W])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
92
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
93 # if autocorrect is activate
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
94 if options.active_autocorrect == "true":
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
95 cmd_line.extend(["--min_hom_seqs", options.min_hom_seqs])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
96 cmd_line.extend(["--min_het_seqs", options.min_het_seqs])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
97 cmd_line.extend(["--max_het_seqs", options.max_het_seqs])
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
98
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
99 # command with dependencies installed
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
100 print "[CMD]:"+' '.join(cmd_line)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
101 subprocess.call(cmd_line)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
102
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
103 # postprocesses
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
104 try:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
105 shutil.copy('batch_1.haplotypes_1.tsv', options.logfile)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
106 except:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
107 sys.stderr.write('Error in genotypes execution; Please read the additional output (stdout)\n')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
108 sys.exit(1)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
109
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
110 # copy all files inside tmp_dir into workdir
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
111
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
112 list_files = glob.glob('*')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
113
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
114 # if compress output is total
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
115
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
116 if options.compress_output == 'total':
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
117 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
118
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
119 for i in list_files:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
120 if re.search('^batch', os.path.basename(i)) \
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
121 and not re.search("\.tsv$", os.path.basename(i)) \
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
122 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
123 or re.search('.*genotypes.*', os.path.basename(i)):
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
124 mytotalzipfile.write(i, os.path.basename(i))
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
125
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
126 # return the unique archive
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
127
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
128 shutil.move('total.zip.temp', options.total_output)
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
129
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
130 # if compress output is default
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
131 if options.compress_output == 'default':
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
132 for i in list_files:
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
133 if re.search('^batch', os.path.basename(i)) \
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
134 and not re.search("\.tsv$", os.path.basename(i)) \
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
135 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)) \
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
136 or re.search('.*genotypes.*', os.path.basename(i)):
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
137 shutil.move(i, '../galaxy_outputs')
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
138
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
139
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
140 if __name__ == '__main__':
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
141 __main__()
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
142
d6ba40f6c824 first commit
cmonjeau
parents:
diff changeset
143