Mercurial > repos > crs4 > seal_galaxy
diff seal-galaxy-cc1b1911/seal/merge_alignments.py @ 0:244073d9abc1 draft default tip
Uploaded
author | crs4 |
---|---|
date | Wed, 15 Oct 2014 09:41:10 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/merge_alignments.py Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +# Copyright (C) 2011-2014 CRS4. +# +# This file is part of Seal. +# +# Seal is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# Seal is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with Seal. If not, see <http://www.gnu.org/licenses/>. + + + +import os +import subprocess +import sys +import tempfile + +import hadoop_galaxy.pathset as pathset +import hadoop_galaxy.cat_paths as cat_paths + +def usage_error(msg=None): + if msg: + print >> sys.stderr, msg + print >> sys.stderr, os.path.basename(__file__), "INPUT_PATHSET OUTPUT [args...]" + sys.exit(1) + +def main(args): + if len(args) < 2: + usage_error() + + # We generate the header with seal_merge_alignments, insert it at the + # top of a copy of the input pathset, and then use cat_parts to + # join everything into a single file. + + input_pathset, output_path = map(os.path.abspath, args[0:2]) + + with tempfile.NamedTemporaryFile() as header_file: + print "generating header" + gen_header_cmd = [ 'seal', 'merge_alignments', '--header-only' ] + gen_header_cmd.extend(args[2:]) + header_text = subprocess.check_output(gen_header_cmd) + + header_file.write(header_text) + header_file.flush() + print "header ready" + print "generating new pathset" + + original_pathset = pathset.FilePathset.from_file(input_pathset) + new_pathset = pathset.FilePathset() + new_pathset.append(header_file.name) + for p in original_pathset: + new_pathset.append(p) + + with tempfile.NamedTemporaryFile() as temp_pathset: + new_pathset.write(temp_pathset) + temp_pathset.flush() + + print "concatenating pathset" + # TODO: Add ability to use dist_cat_paths + cat_paths.main([temp_pathset.name, output_path]) + print "operation complete" + +if __name__ == '__main__': + main(sys.argv[1:])