Mercurial > repos > crs4 > seal_galaxy
comparison seal-galaxy-cc1b1911/seal/demux_galaxy.py @ 0:244073d9abc1 draft default tip
Uploaded
author | crs4 |
---|---|
date | Wed, 15 Oct 2014 09:41:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:244073d9abc1 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 # Copyright (C) 2011-2014 CRS4. | |
4 # | |
5 # This file is part of Seal. | |
6 # | |
7 # Seal is free software: you can redistribute it and/or modify it | |
8 # under the terms of the GNU General Public License as published by the Free | |
9 # Software Foundation, either version 3 of the License, or (at your option) | |
10 # any later version. | |
11 # | |
12 # Seal is distributed in the hope that it will be useful, but | |
13 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 # for more details. | |
16 # | |
17 # You should have received a copy of the GNU General Public License along | |
18 # with Seal. If not, see <http://www.gnu.org/licenses/>. | |
19 | |
20 | |
21 | |
22 """ | |
23 Calls the Seal Demux tool. Then, it calls the custom galaxy integration script | |
24 split_demux_output.py to generate one Galaxy dataset per each sample extracted | |
25 by Demux. | |
26 """ | |
27 | |
28 # parameters: | |
29 # INPUT_DATA | |
30 # MISMATCHES | |
31 # NEW_FILE_PATH | |
32 # NUM_REDUCERS | |
33 # OUTPUT1 | |
34 # OUTPUT_ID | |
35 # SAMPLE_SHEET | |
36 # INPUT_FORMAT | |
37 # OUTPUT_FORMAT | |
38 # OUTPUT_COMPRESSION | |
39 # SEPARATE_READS | |
40 | |
41 import os | |
42 import re | |
43 import subprocess | |
44 import sys | |
45 | |
46 # XXX: add --append-python-path to the possible arguments? | |
47 | |
48 def parse_indexed(s): | |
49 if s is not None: | |
50 normalized = s.lower().strip() | |
51 if normalized == 'notindexed': | |
52 return False | |
53 elif normalized == 'indexed': | |
54 return True | |
55 return None # failed to parse | |
56 | |
57 def parse_index_present(param): | |
58 is_indexed = parse_indexed(param) | |
59 if is_indexed is None: | |
60 # try to read it as a file | |
61 if os.path.isfile(param): | |
62 with open(param) as f: | |
63 contents = f.readline(10000) | |
64 uri, value = contents.split("\t", 1) | |
65 is_indexed = parse_indexed(value) | |
66 if is_indexed is None: | |
67 raise RuntimeError("Error determining whether run has an index read. " + \ | |
68 "Couldn't parse the dataset that was supposed to specify it (first 1000 chars): %s" % contents) | |
69 return is_indexed | |
70 | |
71 def usage_error(msg=None): | |
72 print >> sys.stderr, "Usage error" | |
73 if msg: | |
74 print >> sys.stderr, msg | |
75 print >> sys.stderr, "Usage:", os.path.basename(sys.argv[0]),\ | |
76 "INPUT_DATA MISMATCHES NEW_FILE_PATH NUM_REDUCERS OUTPUT1 OUTPUT_ID SAMPLE_SHEET INPUT_FORMAT OUTPUT_FORMAT OUTPUT_COMPRESSION INDEX_PRESENT SEPARATE_READS" | |
77 sys.exit(1) | |
78 | |
79 | |
80 if __name__ == "__main__": | |
81 if len(sys.argv) != 13: | |
82 usage_error() | |
83 | |
84 input_data = sys.argv[1] | |
85 mismatches = sys.argv[2] | |
86 new_file_path = sys.argv[3] | |
87 num_reducers = sys.argv[4] | |
88 output1 = sys.argv[5] | |
89 output_id = sys.argv[6] | |
90 sample_sheet = sys.argv[7] | |
91 input_format = sys.argv[8] | |
92 output_format = sys.argv[9] | |
93 output_compression = sys.argv[10] | |
94 index_present = sys.argv[11] | |
95 separate_reads = sys.argv[12] | |
96 | |
97 mydir = os.path.abspath(os.path.dirname(__file__)) | |
98 | |
99 # Run the demux program | |
100 cmd = [ | |
101 'hadoop_galaxy', | |
102 '--input', input_data, | |
103 '--input-format', input_format, # --input-format for hadoop-galaxy | |
104 '--output', output1, | |
105 '--executable', 'seal', | |
106 'demux', | |
107 '--sample-sheet', sample_sheet, | |
108 '--input-format', input_format, # --input-format for seal demux | |
109 '--output-format', output_format | |
110 ] | |
111 if re.match(r'\s*\d+\s*', num_reducers): | |
112 cmd.extend( ('--num-reducers', num_reducers) ) | |
113 | |
114 if output_compression.lower() != 'none': | |
115 cmd.extend( ('--compress-output', output_compression) ) | |
116 | |
117 if mismatches != '0': | |
118 cmd.extend( ('--mismatches', mismatches) ) | |
119 | |
120 is_indexed = parse_index_present(index_present) | |
121 if is_indexed is False: | |
122 cmd.append("--no-index") | |
123 | |
124 norm_separate_reads = separate_reads.lower().strip() | |
125 if norm_separate_reads == 'separate-reads': | |
126 cmd.append("--separate-reads") | |
127 elif norm_separate_reads.startswith('f'): | |
128 pass | |
129 else: | |
130 raise RuntimeError("Unrecognized value for separate-reads parameter: '%s'" % separate_reads) | |
131 | |
132 print >> sys.stderr, ' '.join(cmd) | |
133 subprocess.check_call(cmd) | |
134 | |
135 ### | |
136 # now the second phase: split_demux_output.py | |
137 cmd = [ | |
138 os.path.join(mydir, 'split_demux_output.py'), | |
139 output_id, output1, new_file_path ] | |
140 print >> sys.stderr, ' '.join(cmd) | |
141 subprocess.check_call(cmd) |