annotate seal-galaxy-cc1b1911/seal/merge_alignments.py @ 0:244073d9abc1 draft default tip

Uploaded
author crs4
date Wed, 15 Oct 2014 09:41:10 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
244073d9abc1 Uploaded
crs4
parents:
diff changeset
1 #!/usr/bin/env python
244073d9abc1 Uploaded
crs4
parents:
diff changeset
2
244073d9abc1 Uploaded
crs4
parents:
diff changeset
3 # Copyright (C) 2011-2014 CRS4.
244073d9abc1 Uploaded
crs4
parents:
diff changeset
4 #
244073d9abc1 Uploaded
crs4
parents:
diff changeset
5 # This file is part of Seal.
244073d9abc1 Uploaded
crs4
parents:
diff changeset
6 #
244073d9abc1 Uploaded
crs4
parents:
diff changeset
7 # Seal is free software: you can redistribute it and/or modify it
244073d9abc1 Uploaded
crs4
parents:
diff changeset
8 # under the terms of the GNU General Public License as published by the Free
244073d9abc1 Uploaded
crs4
parents:
diff changeset
9 # Software Foundation, either version 3 of the License, or (at your option)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
10 # any later version.
244073d9abc1 Uploaded
crs4
parents:
diff changeset
11 #
244073d9abc1 Uploaded
crs4
parents:
diff changeset
12 # Seal is distributed in the hope that it will be useful, but
244073d9abc1 Uploaded
crs4
parents:
diff changeset
13 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
244073d9abc1 Uploaded
crs4
parents:
diff changeset
14 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
244073d9abc1 Uploaded
crs4
parents:
diff changeset
15 # for more details.
244073d9abc1 Uploaded
crs4
parents:
diff changeset
16 #
244073d9abc1 Uploaded
crs4
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License along
244073d9abc1 Uploaded
crs4
parents:
diff changeset
18 # with Seal. If not, see <http://www.gnu.org/licenses/>.
244073d9abc1 Uploaded
crs4
parents:
diff changeset
19
244073d9abc1 Uploaded
crs4
parents:
diff changeset
20
244073d9abc1 Uploaded
crs4
parents:
diff changeset
21
244073d9abc1 Uploaded
crs4
parents:
diff changeset
22 import os
244073d9abc1 Uploaded
crs4
parents:
diff changeset
23 import subprocess
244073d9abc1 Uploaded
crs4
parents:
diff changeset
24 import sys
244073d9abc1 Uploaded
crs4
parents:
diff changeset
25 import tempfile
244073d9abc1 Uploaded
crs4
parents:
diff changeset
26
244073d9abc1 Uploaded
crs4
parents:
diff changeset
27 import hadoop_galaxy.pathset as pathset
244073d9abc1 Uploaded
crs4
parents:
diff changeset
28 import hadoop_galaxy.cat_paths as cat_paths
244073d9abc1 Uploaded
crs4
parents:
diff changeset
29
244073d9abc1 Uploaded
crs4
parents:
diff changeset
30 def usage_error(msg=None):
244073d9abc1 Uploaded
crs4
parents:
diff changeset
31 if msg:
244073d9abc1 Uploaded
crs4
parents:
diff changeset
32 print >> sys.stderr, msg
244073d9abc1 Uploaded
crs4
parents:
diff changeset
33 print >> sys.stderr, os.path.basename(__file__), "INPUT_PATHSET OUTPUT [args...]"
244073d9abc1 Uploaded
crs4
parents:
diff changeset
34 sys.exit(1)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
35
244073d9abc1 Uploaded
crs4
parents:
diff changeset
36 def main(args):
244073d9abc1 Uploaded
crs4
parents:
diff changeset
37 if len(args) < 2:
244073d9abc1 Uploaded
crs4
parents:
diff changeset
38 usage_error()
244073d9abc1 Uploaded
crs4
parents:
diff changeset
39
244073d9abc1 Uploaded
crs4
parents:
diff changeset
40 # We generate the header with seal_merge_alignments, insert it at the
244073d9abc1 Uploaded
crs4
parents:
diff changeset
41 # top of a copy of the input pathset, and then use cat_parts to
244073d9abc1 Uploaded
crs4
parents:
diff changeset
42 # join everything into a single file.
244073d9abc1 Uploaded
crs4
parents:
diff changeset
43
244073d9abc1 Uploaded
crs4
parents:
diff changeset
44 input_pathset, output_path = map(os.path.abspath, args[0:2])
244073d9abc1 Uploaded
crs4
parents:
diff changeset
45
244073d9abc1 Uploaded
crs4
parents:
diff changeset
46 with tempfile.NamedTemporaryFile() as header_file:
244073d9abc1 Uploaded
crs4
parents:
diff changeset
47 print "generating header"
244073d9abc1 Uploaded
crs4
parents:
diff changeset
48 gen_header_cmd = [ 'seal', 'merge_alignments', '--header-only' ]
244073d9abc1 Uploaded
crs4
parents:
diff changeset
49 gen_header_cmd.extend(args[2:])
244073d9abc1 Uploaded
crs4
parents:
diff changeset
50 header_text = subprocess.check_output(gen_header_cmd)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
51
244073d9abc1 Uploaded
crs4
parents:
diff changeset
52 header_file.write(header_text)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
53 header_file.flush()
244073d9abc1 Uploaded
crs4
parents:
diff changeset
54 print "header ready"
244073d9abc1 Uploaded
crs4
parents:
diff changeset
55 print "generating new pathset"
244073d9abc1 Uploaded
crs4
parents:
diff changeset
56
244073d9abc1 Uploaded
crs4
parents:
diff changeset
57 original_pathset = pathset.FilePathset.from_file(input_pathset)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
58 new_pathset = pathset.FilePathset()
244073d9abc1 Uploaded
crs4
parents:
diff changeset
59 new_pathset.append(header_file.name)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
60 for p in original_pathset:
244073d9abc1 Uploaded
crs4
parents:
diff changeset
61 new_pathset.append(p)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
62
244073d9abc1 Uploaded
crs4
parents:
diff changeset
63 with tempfile.NamedTemporaryFile() as temp_pathset:
244073d9abc1 Uploaded
crs4
parents:
diff changeset
64 new_pathset.write(temp_pathset)
244073d9abc1 Uploaded
crs4
parents:
diff changeset
65 temp_pathset.flush()
244073d9abc1 Uploaded
crs4
parents:
diff changeset
66
244073d9abc1 Uploaded
crs4
parents:
diff changeset
67 print "concatenating pathset"
244073d9abc1 Uploaded
crs4
parents:
diff changeset
68 # TODO: Add ability to use dist_cat_paths
244073d9abc1 Uploaded
crs4
parents:
diff changeset
69 cat_paths.main([temp_pathset.name, output_path])
244073d9abc1 Uploaded
crs4
parents:
diff changeset
70 print "operation complete"
244073d9abc1 Uploaded
crs4
parents:
diff changeset
71
244073d9abc1 Uploaded
crs4
parents:
diff changeset
72 if __name__ == '__main__':
244073d9abc1 Uploaded
crs4
parents:
diff changeset
73 main(sys.argv[1:])