Mercurial > repos > crs4 > seal_galaxy
changeset 0:244073d9abc1 draft default tip
Uploaded
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/README.md Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,73 @@ + +Galaxy wrapper for the Seal toolkit +==================================== + +These are the Galaxy wrappers for the Seal toolkit for Hadoop-based processing +of sequencing data (http://biodoop-seal.sf.net). + + +Installation +------------------- + +You can install the Seal-Galaxy wrappers through the Galaxy toolshed or like +any other Galaxy tool. The installation process will try to fetch and build +Seal and some of its dependencies. However, you'll need to make sure that +the build process can find any required headers, libraries and executables, +such as: + +* javac +* protobuf +* maven +* ant +* zlib +* git +* hadoop + +For details on Seal's installation process refer directly to [its +documentation](http://biodoop-seal.sourceforge.net/installation.html). + +Hadoop-Galaxy integration +---------------------------- + +These wrappers use the [Hadoop-Galaxy](https://github.com/crs4/hadoop-galaxy) +tool to implement the integration between Hadoop and Galaxy. You should have a +look at its documentation. + +An important issue +----------------------- + +An implication of the integration provided by Hadoop-Galaxy is that Galaxy +knows nothing about your actual data. Because of this, removing the Galaxy +datasets does not delete the files produced by your Hadoop runs, potentially +resulting in the waste of a lot of space. Also, be careful with situations +where you may end up with multiple pathsets pointing to the same data, or where +they point to data that you want to access from Hadoop but would not want to +delete (e.g., your run directories). + +Have a look at the Hadoop-Galaxy README for more details. + + +Authors +------------- + +Luca Pireddu <pireddu@crs4.it> + + +Support +------------- + +No support is provided. + + + +License +-------------- + +This code is release under the GPLv3. + + + +Copyright +-------------- + +Copyright CRS4, 2011-2014.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/make_release.sh Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,93 @@ +#!/bin/bash + +#set -x +set -o errexit +set -o nounset +set -o pipefail + +PackageName="seal-galaxy" + + +function error() { + if [ $# -ge 1 ]; then + echo $* >&1 + fi + exit 1 +} + +function usage_error() { + echo "Usage: $0 version" + echo "Specify version as a git revid (id or tag) for the Seal repository, and " >&2 + echo "optionally a '-n' suffix for the wrapper version; e.g., 0.4.1, 0.4.1-1, 0.4.1-2" >&2 + error +} + +function confirm() { + local prompt="${1}" + echo "${prompt} [Y/n]" + read -p "Answer: " yn + case "${yn}" in + ''|[Yy]) # do nothing and keep going + ;; + [Nn]) echo "Aborting"; exit 0 + ;; + *) usage_error "Unrecognized answer. Please specify Y or n" + ;; + esac + return 0 +} + +function rewrite_seal_version() { + local grep_expr='<package name="seal" version=".*">' + if ! grep "${grep_expr}" tool_dependencies.xml >/dev/null ; then + error "Couldn't find expected package line in tool_dependencies.xml" + fi + + printf -v sed_expr1 '/<package name="seal"/s/version="[^"]*"/version="%s"/' "${seal_version}" + printf -v sed_expr2 '/<action type="shell_command">/s/git reset --hard \([^<]\+\)\s*/git reset --hard %s/' "${seal_version}" + sed -i -e "${sed_expr1}" -e "${sed_expr2}" tool_dependencies.xml + echo "Edited tool_dependencies.xml" >&2 + + # edit the tools as well + printf -v sed_expr3 '/<requirement type="package" version=.*>\s*seal\s*</s/version="[^"]\+"/version="%s"/' "${seal_version}" + printf -v sed_expr4 '/<tool id=/s/version="[^"]\+"/version="%s"/' "${seal_version}" + sed -i -e "${sed_expr3}" -e "${sed_expr4}" seal/*.xml + + echo "Edited tool definitions" >&2 +} + +############# main ###############3 + +if [ $# -eq 1 ]; then + wrapper_version="${1}" +else + usage_error +fi + +echo "Will rewrite tool_dependencies.xml setting the the package version to '${wrapper_version}'." +confirm "Are you sure you want to proceed? [Y/n]" + +# ensure the tag doesn't already exist +if git tag -l | grep -w "${wrapper_version}" ; then + error "A release tag called '${wrapper_version}' already exists" +fi + +# remove the wrapper suffix, if it's there +seal_version=$(echo ${wrapper_version} | sed -e 's/-[^-]\+$//') +echo "Using seal version ${seal_version}" + +rewrite_seal_version "${seal_version}" + +git commit -a --allow-empty -m "Wrappers release for Seal '${seal_version}'" +git tag "${wrapper_version}" + +revid=$(git rev-parse HEAD) + +echo "Tagged new commit ${revid} with tag '${wrapper_version}'" + +short_revid=${revid::8} +archive_name=${PackageName}-${short_revid}.tar.gz + +git archive --format tar.gz --prefix ${PackageName}-${short_revid}/ HEAD -o "${archive_name}" + +echo "Don't forget to upload the archive to the toolshed!"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/bcl2qseq.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,112 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="dist_bcl2qseq" name="Dist Bcl2Qseq" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Convert Illumina bcl files to qseq on Hadoop</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + + <command> + hadoop_galaxy + --executable seal + --input $input_data + --output $output1 + bcl2qseq + #if $advanced.control == 'show' + #if $advanced.bcl2qseq_bin: + --bclToQseq-path $advanced.bcl2qseq_bin + #end if + + #if $advanced.additional_ld_path + --append-ld-library-path $advanced.additional_ld_path + #end if + + #if $advanced.ignore_missing_bcl + --ignore-missing-bcl + #end if + + #if $advanced.ignore_missing_control + --ignore-missing-control + #end if + + #if $advanced.exclude_controls + --exclude-controls + #end if + + #if $advanced.no_eamss + --no-eamss + #end if + #end if + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Source data set"/> + <conditional name="advanced"> + <param name="control" type="select" label="Advanced controls" default="hide"> + <option value="hide">Hide</option> + <option value="show">Show</option> + </param> + <when value="show"> + <param name="ignore_missing_bcl" + type="boolean" default="false" + label="Interpret missing *.bcl files as a base calling of '.'" + /> + <param name="ignore_missing_control" + type="boolean" default="false" + label="Don't throw an error when *.control files are missing" + /> + <param name="exclude_controls" + type="boolean" default="false" + label="Do not include clusters that are used as controls" + /> + <param name="no_eamss" + type="boolean" default="false" + label="Do not apply the EAMSS masking on the quality values" + /> + <param name="bcl2qseq_bin" + type="text" + default="" + size="80" + label="Full path to bclToQseq binary (needed only if the executable isn't in the PATH)" + /> + <param name="additional_ld_path" + type="text" default="" size="80" + label="paths to append to the value of LD_LIBRARY_PATH" + /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output1" format="pathset" label="Qseq" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> + This is a Pydoop-based distributed version of Illumina's bclToQseq tool. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/demux.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,118 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="seal_demux" name="Demux" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4" force_history_refresh="True"> + <description>Demultiplex Illumina runs on Hadoop</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + + <command interpreter="python"> + demux_galaxy.py + $input_data + $mismatches + $__new_file_path__ + #if $num_reducers + $num_reducers + #else + null + #end if + $output1 + $output1.id + $sample_sheet + $input_format + $output_format + $output_compression + #if $index.specify_index == 'present' + true + #else if $index.specify_index == 'not_present' + false + #else if $index.specify_index == 'dynamic' + $index_present + #else + #raise ValueError('Invalid index value!') + #end if + $separate_reads + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Source data set"/> + <param name="sample_sheet" type="data" format="csv" label="Sample sheet" /> + + <conditional name="index"> + <param name="specify_index" type="select" label="Index read" default="present"> + <option value="present">Present</option> + <option value="not_present">Not present</option> + <option value="dynamic">Determine at runtime</option> + </param> + <when value="dynamic"> + <param name="index_present" type="data" /> + </when> + </conditional> + + <param name="mismatches" + label="Barcode base mismatch limit" + type="integer" + value="0" + min="0" + max="3" /> + <param name="num_reducers" + label="Number of reduce tasks" + type="integer" + value="90" + min="1" + optional="true" + /> + <param name="input_format" type="select" label="Input data format" default="qseq"> + <option value="qseq">Qseq</option> + <option value="fastq">Fastq</option> + </param> + <param name="output_format" type="select" label="Output data format" default="qseq"> + <option value="qseq">Qseq</option> + <option value="fastq">Fastq</option> + </param> + <param name="output_compression" type="select" label="Output compression" default="none"> + <option value="none">None</option> + <option value="gzip">Gzip</option> + <option value="bzip2">Bzip2</option> + </param> + + <param name="separate_reads" type="boolean" + label="Separate reads by read number" + default="false" + truevalue="separate-reads" + /> + </inputs> + + <outputs> + <data name="output1" format="pathset" label="Demuxed" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> + Demux is a Hadoop utility to demultiplex data from multiplexed Illumina runs. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/demux_galaxy.py Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Copyright (C) 2011-2014 CRS4. +# +# This file is part of Seal. +# +# Seal is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# Seal is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with Seal. If not, see <http://www.gnu.org/licenses/>. + + + +""" +Calls the Seal Demux tool. Then, it calls the custom galaxy integration script +split_demux_output.py to generate one Galaxy dataset per each sample extracted +by Demux. +""" + +# parameters: +# INPUT_DATA +# MISMATCHES +# NEW_FILE_PATH +# NUM_REDUCERS +# OUTPUT1 +# OUTPUT_ID +# SAMPLE_SHEET +# INPUT_FORMAT +# OUTPUT_FORMAT +# OUTPUT_COMPRESSION +# SEPARATE_READS + +import os +import re +import subprocess +import sys + +# XXX: add --append-python-path to the possible arguments? + +def parse_indexed(s): + if s is not None: + normalized = s.lower().strip() + if normalized == 'notindexed': + return False + elif normalized == 'indexed': + return True + return None # failed to parse + +def parse_index_present(param): + is_indexed = parse_indexed(param) + if is_indexed is None: + # try to read it as a file + if os.path.isfile(param): + with open(param) as f: + contents = f.readline(10000) + uri, value = contents.split("\t", 1) + is_indexed = parse_indexed(value) + if is_indexed is None: + raise RuntimeError("Error determining whether run has an index read. " + \ + "Couldn't parse the dataset that was supposed to specify it (first 1000 chars): %s" % contents) + return is_indexed + +def usage_error(msg=None): + print >> sys.stderr, "Usage error" + if msg: + print >> sys.stderr, msg + print >> sys.stderr, "Usage:", os.path.basename(sys.argv[0]),\ + "INPUT_DATA MISMATCHES NEW_FILE_PATH NUM_REDUCERS OUTPUT1 OUTPUT_ID SAMPLE_SHEET INPUT_FORMAT OUTPUT_FORMAT OUTPUT_COMPRESSION INDEX_PRESENT SEPARATE_READS" + sys.exit(1) + + +if __name__ == "__main__": + if len(sys.argv) != 13: + usage_error() + + input_data = sys.argv[1] + mismatches = sys.argv[2] + new_file_path = sys.argv[3] + num_reducers = sys.argv[4] + output1 = sys.argv[5] + output_id = sys.argv[6] + sample_sheet = sys.argv[7] + input_format = sys.argv[8] + output_format = sys.argv[9] + output_compression = sys.argv[10] + index_present = sys.argv[11] + separate_reads = sys.argv[12] + + mydir = os.path.abspath(os.path.dirname(__file__)) + + # Run the demux program + cmd = [ + 'hadoop_galaxy', + '--input', input_data, + '--input-format', input_format, # --input-format for hadoop-galaxy + '--output', output1, + '--executable', 'seal', + 'demux', + '--sample-sheet', sample_sheet, + '--input-format', input_format, # --input-format for seal demux + '--output-format', output_format + ] + if re.match(r'\s*\d+\s*', num_reducers): + cmd.extend( ('--num-reducers', num_reducers) ) + + if output_compression.lower() != 'none': + cmd.extend( ('--compress-output', output_compression) ) + + if mismatches != '0': + cmd.extend( ('--mismatches', mismatches) ) + + is_indexed = parse_index_present(index_present) + if is_indexed is False: + cmd.append("--no-index") + + norm_separate_reads = separate_reads.lower().strip() + if norm_separate_reads == 'separate-reads': + cmd.append("--separate-reads") + elif norm_separate_reads.startswith('f'): + pass + else: + raise RuntimeError("Unrecognized value for separate-reads parameter: '%s'" % separate_reads) + + print >> sys.stderr, ' '.join(cmd) + subprocess.check_call(cmd) + + ### + # now the second phase: split_demux_output.py + cmd = [ + os.path.join(mydir, 'split_demux_output.py'), + output_id, output1, new_file_path ] + print >> sys.stderr, ' '.join(cmd) + subprocess.check_call(cmd)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/generate_sam_header.py Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# Copyright (C) 2011-2014 CRS4. +# +# This file is part of Seal. +# +# Seal is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# Seal is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with Seal. If not, see <http://www.gnu.org/licenses/>. + + + +# A really really thin wrapper. We only seem to need it because Galaxy won't +# search for the command in the PATH + +import os +import subprocess +import sys + +if __name__ == '__main__': + output_path = sys.argv[-1] + try: + # seal merge_alignments won't overwrite an existing file, so we first remove + # the file Galaxy creates for us. + os.remove(output_path) + except IOError: + pass + hadoopized_output_path = 'file://' + os.path.abspath(output_path) + cmd = [ 'seal', 'merge_alignments' ] + sys.argv[1:-1] + cmd.append(hadoopized_output_path) + print "running command:", str(cmd) + subprocess.check_call(cmd)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/generate_sam_header.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,100 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="generate_sam_header" name="Generate SAM header" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Generate a SAM header for the given reference</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + + <command interpreter="python"> + #set $ref_path = 'file://' + $reference.fields.path if $reference.fields.path.startswith('/') else $reference.fields.path + generate_sam_header.py + --header-only + --annotations ${ref_path}.ann + --sort-order $sort_order + + #if $compute_md5: + --md5 + #end if + + #if $assembly: + --sq-assembly "$assembly" + #end if + + #if $rg.set_rg == 'true': + --rg_cn "$rg.rg_cn" + --rg_dt "$rg.rg_dt" + --rg_id "$rg.rg_id" + --rg_lb "$rg.rg_lb" + --rg_pl "$rg.rg_pl" + --rg_pu "$rg.rg_pu" + --rg_sm "$rg.rg_sm" + #end if + + ${output} + </command> + + <inputs> + <param name="reference" type="select" label="Reference (should be the same one used for alignment)"> + <options from_data_table="bwa_0510_indexes" /> + </param> + + <param name="sort_order" type="select" default="coordinate"> + <option value="coordinate">Coordinate</option> + <option value="read_id">Read ID</option> + <option value="unsorted">Unsorted</option> + </param> + + <param name="compute_md5" type="boolean" checked="false" label="Whether to compute the MD5 checksums of the reference contigs" /> + <param name="assembly" type="text" label="Genome assembly identifier (@SQ AS:XXX tag)" /> + + <conditional name="rg"> + <param name="set_rg" type="boolean" checked="false" label="Set a Read Group line" truevalue="true" falsevalue="false" /> + + <when value="true"> + <param name="rg_cn" type="text" label="Read group center" /> + <param name="rg_dt" type="text" label="Read group date" /> + <param name="rg_id" type="text" label="Read group id" /> + <param name="rg_lb" type="text" label="Read group library" /> + <param name="rg_pl" type="text" label="Read group platform" /> + <param name="rg_pu" type="text" label="Read group platform unit" /> + <param name="rg_sm" type="text" label="Read group sample" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="sam" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> +ReadSort is a Hadoop-based program for sorting reads by alignment position. +For the full help see the `manual <http://biodoop-seal.sourceforge.net/read_sort_index.html>`_. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/merge_alignments.py Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +# Copyright (C) 2011-2014 CRS4. +# +# This file is part of Seal. +# +# Seal is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# Seal is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with Seal. If not, see <http://www.gnu.org/licenses/>. + + + +import os +import subprocess +import sys +import tempfile + +import hadoop_galaxy.pathset as pathset +import hadoop_galaxy.cat_paths as cat_paths + +def usage_error(msg=None): + if msg: + print >> sys.stderr, msg + print >> sys.stderr, os.path.basename(__file__), "INPUT_PATHSET OUTPUT [args...]" + sys.exit(1) + +def main(args): + if len(args) < 2: + usage_error() + + # We generate the header with seal_merge_alignments, insert it at the + # top of a copy of the input pathset, and then use cat_parts to + # join everything into a single file. + + input_pathset, output_path = map(os.path.abspath, args[0:2]) + + with tempfile.NamedTemporaryFile() as header_file: + print "generating header" + gen_header_cmd = [ 'seal', 'merge_alignments', '--header-only' ] + gen_header_cmd.extend(args[2:]) + header_text = subprocess.check_output(gen_header_cmd) + + header_file.write(header_text) + header_file.flush() + print "header ready" + print "generating new pathset" + + original_pathset = pathset.FilePathset.from_file(input_pathset) + new_pathset = pathset.FilePathset() + new_pathset.append(header_file.name) + for p in original_pathset: + new_pathset.append(p) + + with tempfile.NamedTemporaryFile() as temp_pathset: + new_pathset.write(temp_pathset) + temp_pathset.flush() + + print "concatenating pathset" + # TODO: Add ability to use dist_cat_paths + cat_paths.main([temp_pathset.name, output_path]) + print "operation complete" + +if __name__ == '__main__': + main(sys.argv[1:])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/merge_alignments.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,100 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="seal_merge_alignments" name="Merge Alignments" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Merge a pathset of part-files of alignments into a single well-formatted SAM file</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + <command interpreter="python"> + #set $ref_path = 'file://' + $reference.fields.path if $reference.fields.path.startswith('/') else $reference.fields.path + merge_alignments.py + $input_data + $output + + --annotations ${ref_path}.ann + --sort-order $sort_order + + #if $compute_md5: + --md5 + #end if + + #if $assembly: + --sq-assembly "$assembly" + #end if + + #if $rg.set_rg == 'true': + --rg_cn "$rg.rg_cn" + --rg_dt "$rg.rg_dt" + --rg_id "$rg.rg_id" + --rg_lb "$rg.rg_lb" + --rg_pl "$rg.rg_pl" + --rg_pu "$rg.rg_pu" + --rg_sm "$rg.rg_sm" + #end if + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Input data" /> + + <param name="reference" type="select" label="Reference (should be the same one used for alignment)"> + <options from_data_table="bwa_0510_indexes" /> + </param> + + <param name="sort_order" type="select" default="coordinate"> + <option value="coordinate">Coordinate</option> + <option value="read_id">Read ID</option> + <option value="unsorted">Unsorted</option> + </param> + <param name="compute_md5" type="boolean" checked="false" label="Whether to compute the MD5 checksums of the reference contigs" /> + <param name="assembly" type="text" label="Genome assembly identifier (@SQ AS:XXX tag)" /> + + <conditional name="rg"> + <param name="set_rg" type="boolean" checked="false" label="Set a Read Group line" truevalue="true" falsevalue="false" /> + + <when value="true"> + <param name="rg_cn" type="text" label="Read group center" /> + <param name="rg_dt" type="text" label="Read group date" /> + <param name="rg_id" type="text" label="Read group id" /> + <param name="rg_lb" type="text" label="Read group library" /> + <param name="rg_pl" type="text" label="Read group platform" /> + <param name="rg_pu" type="text" label="Read group platform unit" /> + <param name="rg_sm" type="text" label="Read group sample" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output" format="sam" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> +ReadSort is a Hadoop-based program for sorting reads by alignment position. +For the full help see the `manual <http://biodoop-seal.sourceforge.net/read_sort_index.html>`_. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/prq.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,118 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="seal_prq" name="Prq" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Convert qseq or fastq files to prq on Hadoop</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + <command> + hadoop_galaxy + --input $input_data + --input-format $input_format.type + --output $output1 + --executable seal + prq + --input-format $input_format.type + --num-reducers $num_reducers + -D hbam.qseq-input.base-quality-encoding=$input_format.bq_encoding + -D hbam.fastq-input.base-quality-encoding=$input_format.bq_encoding + + #if $bpr + -D seal.prq.min-bases-per-read=$bpr + #end if + #if $drop_failed + -D seal.prq.drop-failed-filter=$drop_failed + #end if + #if $warn_unpaired + -D seal.prq.warning-only-if-unpaired=$warn_unpaired + #end if + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Input data" /> + <conditional name="input_format"> + <!-- We use a conditional for the input_format since we want a different + default base quality encoding value for each of the respective + supported formats, qseq and fastq.--> + <param name="type" type="select" label="Input format" default="qseq"> + <option value="qseq" /> + <option value="fastq" /> + </param> + <when value="qseq"> + <param name="bq_encoding" type="select" label="BQ encoding" default="illumina"> + <option value="illumina">Illumina</option> + <option value="sanger">Sanger</option> + </param> + </when> + <when value="fastq"> + <param name="bq_encoding" type="select" label="BQ encoding" default="sanger"> + <option value="sanger">Sanger</option> + <option value="illumina">Illumina</option> + </param> + </when> + </conditional> + + <param name="num_reducers" + label="Number of reduce tasks" + type="integer" + value="90" + min="1" + /> + + <!-- prq-specific parameters --> + <param name="bpr" + label="Min bases per read" + type="integer" + help="If neither read in a pair has at least this many known bases the pair is dropped (prop: seal.prq.min-bases-per-read)." + value="30" + min="0" + /> + <param name="drop_failed" + label="Filter by machine quality check" + type="boolean" + help="Drop pairs if both reads failed machine quality checks (prop: seal.prq.drop-failed-filter)." + checked="true" + /> + <param name="warn_unpaired" + label="Warn only on unpaired reads" + type="boolean" + help="PRQ normally gives an error if it finds an unpaired read. If this setting is checked it will instead emit a warning, drop the unpaired read and keep going (prop: seal.prq.warning-only-if-unpaired)." + checked="false" + /> + </inputs> + + <outputs> + <data name="output1" format="pathset" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> +PairReadsQSeq (PRQ) is a Hadoop utility to convert Illumina qseq files into +prq file format. For the full help see the `manual <http://biodoop-seal.sourceforge.net/prq_index.html>`_. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/read_sort.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,68 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="seal_read_sort" name="ReadSort" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Sort reads with Hadoop</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + <command> + #set $ref_path = 'file://' + $reference.fields.path if $reference.fields.path.startswith('/') else $reference.fields.path + hadoop_galaxy + --input $input_data + --output $output + --executable seal + read_sort + --annotations ${ref_path}.ann + --num-reducers $num_reducers + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Input data" /> + + <param name="reference" type="select" label="Reference (should be the same one used for alignment)"> + <options from_data_table="bwa_0510_indexes" /> + </param> + + <param name="num_reducers" + label="Number of reduce tasks" + type="integer" + value="90" + min="1" + /> + </inputs> + + <outputs> + <data name="output" format="pathset" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> +ReadSort is a Hadoop-based program for sorting reads by alignment position. +For the full help see the `manual <http://biodoop-seal.sourceforge.net/read_sort_index.html>`_. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/recab_table.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,122 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="seal_recab_table" name="Recab Table" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Calculate a base quality recalibration table on Hadoop.</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + + <command interpreter="python"> + recab_table_galaxy.py + $input_data + $output1 + + #if $dbsnp.db_source == "history": + $dbsnp.ownFile + #else: + ${dbsnp.built-inFile.fields.path} + #end if + + $num_reducers + + #if $default_rg: + -D seal.recab.rg-covariate.default-rg=$default_rg + #end if + + #if $smoothing: + -D seal.recab.smoothing=$smoothing + #end if + + #if $max_qscore: + -D seal.recab.max-qscore=$max_qscore + #end if + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Input data" /> + <param name="input_format" type="select" label="Input format" default="sam"> + <option value="sam" /> + <option value="bam" /> + </param> + + <conditional name="dbsnp"> + <param name="db_source" type="select" label="Select database of known variation sites"> + <option value="built-in">Select a different built-in database</option> + <option value="history">Use a database (vcf format) from my history</option> + </param> + + <when value="built-in"> + <param name="built-inFile" type="select" label="Select a built-in database"> + <options from_data_table="variant_tables"/> + </param> + </when> + + <when value="history"> + <param name="ownFile" type="data" format="vcf" label="Select a database from history"/> + </when> + </conditional> + + + <param name="num_reducers" + label="Number of reduce tasks" + type="integer" + value="90" + min="1" + /> + + <!-- recab-specific parameters --> + <param name="default_rg" + label="Default read group" + type="text" + help="Read group to assign to mappings without an RG tag. This value is mandatory if your data includes mappings that do not have a read group tag (RG) Seal RecabTable property: seal.recab.rg-covariate.default-rg." + /> + <param name="smoothing" + label="Smoothing" + type="integer" + value="0" + help="Smoothing parameter for empirical quality calculation. Seal RecabTable property: seal.recab.smoothing." + min="0" + /> + <param name="max_qscore" + label="Max quality score" + type="integer" + value="40" + min="1" + help="Upper limit for the empirical quality scores. Seal RecabTable property: seal.recab.max-qscore." + /> + </inputs> + + <outputs> + <data name="output1" format="csv" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> +RecabTable is a Hadoop program to calculate a table of base qualities for all values of a given set of factors. It computes a result equivalent to the GATK CountCovariatesWalker. +For the full help see the `manual <http://biodoop-seal.sourceforge.net/recab_table_index.html>`_. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/recab_table_galaxy.py Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,116 @@ +#!/usr/bin/env python + +# Copyright (C) 2011-2014 CRS4. +# +# This file is part of Seal. +# +# Seal is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# Seal is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with Seal. If not, see <http://www.gnu.org/licenses/>. + + + +""" +Calls the Seal RecabTable tool. Then, it calls recab_table_fetch to +concatenate all the partial tables and create a single csv file. +""" + + +# parameters: +# INPUT_DATA +# OUTPUT +# VCF +# NUM_REDUCERS +# [OTHER] + +import os +import sys + +import hadoop_galaxy.pathset as pathset +import subprocess +import tempfile +import pydoop.hdfs as phdfs + +# XXX: add --append-python-path to the possible arguments? + +def usage_error(msg=None): + if msg: + print >> sys.stderr, msg + print >> sys.stderr, os.path.basename(sys.argv[0]), "INPUT_DATA OUTPUT VCF NUM_REDUCERS [OTHER]" + sys.exit(1) + + +def run_recab(input_path, output_path, vcf, num_red, other_args): + mydir = os.path.abspath(os.path.dirname(__file__)) + cmd = [ + 'hadoop_galaxy', + '--input', input_path, + '--output', output_path, + '--executable', 'seal', + 'recab_table', + '--vcf-file', vcf, + '--num-reducers', num_red + ] + + if other_args: + cmd.extend(other_args) + + # now execute the hadoop job + subprocess.check_call(cmd) + +def collect_table(pset, output_path): + # finally, fetch the result into the final output file + cmd = ['seal', 'recab_table_fetch'] + cmd.extend(pset.get_paths()) + cmd.append(output_path) + try: + # remove the file that galaxy creates. recab_table_fetch refuses to + # overwrite it + os.unlink(output_path) + except IOError: + pass + subprocess.check_call(cmd) + +def cleanup(out_pathset): + # clean-up job output + for path in out_pathset: + try: + print >> sys.stderr, "Deleting output path", path + phdfs.rmr(path) + except StandardError as e: + print >> sys.stderr, "Error!", str(e) + +def main(args): + if len(args) < 5: + usage_error() + + input_data = args[0] + final_output = args[1] + vcf = args[2] + num_reducers = args[3] + other = args[4:] + + # Create a temporary pathset to reference the recab_table + # output directory + with tempfile.NamedTemporaryFile(mode='rwb') as tmp_pathset_file: + try: + run_recab(input_data, tmp_pathset_file.name, vcf, num_reducers, other) + tmp_pathset_file.seek(0) + out_paths = pathset.FilePathset.from_file(tmp_pathset_file) + collect_table(out_paths, final_output) + finally: + cleanup(out_paths) + +if __name__ == "__main__": + main(sys.argv[1:]) + +# vim: et ai ts=2 sw=2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/seqal.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,83 @@ + +<!-- + Copyright (C) 2011-2014 CRS4. + + This file is part of Seal. + + Seal is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + Seal is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License along + with Seal. If not, see <http://www.gnu.org/licenses/>. +--> + + +<tool id="seal_seqal" name="Seqal" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <description>Map reads on Hadoop</description> + <requirements> + <requirement type="package" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4">seal</requirement> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.3">hadoop-galaxy</requirement> + </requirements> + + <command> + hadoop_galaxy + --input $input_data + --output $output1 + --executable seal + seqal + #if $align_only.value: + --align-only --num-reducers 0 + #else + --num-reducers $align_only.num_reducers + #end if + --trimq $trimq + ${reference.fields.path} + </command> + + <inputs> + <param name="input_data" type="data" format="pathset" label="Input data" /> + + <param name="reference" type="select" label="Select a built-in reference index archive"> + <options from_data_table="seqal_indexes"> + </options> + </param> + + <param name="trimq" type="integer" min="0" value="0" label="trim quality, like BWA’s -q argument" /> + + <conditional name="align_only"> + <param name="value" type="boolean" default="false" label="Align only (don't identify duplicates)" /> + <when value="false"> + <param name="num_reducers" + label="Number of reduce tasks" + type="integer" + value="90" + min="1" + /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output1" format="pathset" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> + Seqal is a distributed short read mapping and duplicate removal tool. It + implements a distributed version of the BWA aligner, and adds a duplicate + read identification feature using the same criteria as the Picard + MarkDuplicates command. For a full description see the `manual + <http://biodoop-seal.sourceforge.net/seqal_index.html>`_. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal/split_demux_output.py Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +# Copyright (C) 2011-2014 CRS4. +# +# This file is part of Seal. +# +# Seal is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) +# any later version. +# +# Seal is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License along +# with Seal. If not, see <http://www.gnu.org/licenses/>. + + + +import logging +import os +import sys + +import pydoop.hdfs as phdfs + +from hadoop_galaxy.pathset import FilePathset + +Debug = os.environ.get('DEBUG', None) +logging.basicConfig(level=logging.DEBUG if Debug else logging.INFO) + +def usage_error(msg=None): + if msg: + print >> sys.stderr, msg + print >> sys.stderr, "Usage: %s OUTPUT_ID DEMUX_OUTPUT_PATHSET NEW_FILE_DIR" % os.path.basename(sys.argv[0]) + sys.exit(1) + + +class PathsetWriter(object): + # The format is dictated by the Galaxy documentation for tools that produce a variable + # number of output files: http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple%20Output%20Files + # We fix the file_type to 'pathset'. + Galaxy_output_name_template = "primary_%s_%s_visible_pathset" + + def __init__(self, output_dir, output_id, data_type): + self.output_dir = output_dir + self.output_id = output_id + self.data_type = data_type + + def write_pathset(self, dataset_path, name): + """ + dataset_path: the path of the dataset to which the new pathset needs to refer + name: name of dataset to appear in Galaxy + """ + if not name: + raise RuntimeError("Blank dataset name") + sanitized_name = name.replace('_', '-') # replace _ with - or galaxy won't like the name + opathset = FilePathset(dataset_path) + opathset.set_datatype(self.data_type) + opath = os.path.join(self.output_dir, self.Galaxy_output_name_template % (self.output_id, sanitized_name)) + logging.debug("writing dataset path %s to pathset file %s", dataset_path, opath) + with open(opath, 'w') as f: + opathset.write(f) + return self # to allow chaining + + + +def main(): + if len(sys.argv) != 4: + usage_error("Wrong number of arguments") + + output_id, demux_data, dest_dir = sys.argv[1:] + logging.debug("input args: output_id, demux_data, dest_dir = %s", sys.argv[1:]) + + ipathset = FilePathset.from_file(demux_data) + logging.debug("input path set: %s", ipathset) + + writer = PathsetWriter(dest_dir, output_id, ipathset.datatype) + + # ipathset points to the output directory given to demux. Inside it + # we should find all the project/sample subdirectories, plus 'unknown' (if there + # were any reads not attributable to a sample). So, we list the output + # dir and collect sample names and their paths. In theory, the pathset + # we receive as input should only contains the output from one demux; thus + # a sample should only occur once. + if len(ipathset) != 1: + raise RuntimeError("Unexpected demux output pathset size of %d. Expected 1 (the demux output path)" % len(ipathset)) + + project_paths = \ + filter(lambda p: os.path.basename(p)[0] not in ('_', '.'), # filter hadoop and regular hidden files + phdfs.ls(iter(ipathset).next()) # List the contents of the pathset. ls produces absolute paths + ) + # Each project_path points to a directory containing the data from one project. + # There may also be a directory 'unknown' + for project_path in project_paths: + if os.path.basename(project_path).lower() == 'unknown': + writer.write_pathset(project_path, 'unknown') + else: + for project_sample_path in phdfs.ls(project_path): + # take the last two elements of the path -- should be project, sample + complete_sample_name = "%s.%s" % tuple(project_sample_path.split(os.path.sep)[-2:]) + writer.write_pathset(project_sample_path, complete_sample_name) + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/seal_tool_conf.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,14 @@ +<?xml version="1.0"?> + +<toolbox> + <section name="Seal" id="seal"> + <tool file="seal_galaxy/seal/bcl2qseq.xml" /> + <tool file="seal_galaxy/seal/demux.xml" /> + <tool file="seal_galaxy/seal/prq.xml" /> + <tool file="seal_galaxy/seal/seqal.xml" /> + <tool file="seal_galaxy/seal/read_sort.xml" /> + <tool file="seal_galaxy/seal/merge_alignments.xml" /> + <tool file="seal_galaxy/seal/recab_table.xml" /> + <tool file="seal_galaxy/seal/generate_sam_header.xml" /> + </section> +</toolbox>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/tool_data_table_conf.xml.sample Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,16 @@ +<tables> + <table name="bwa_0510_indices" comment_char="#"> + <columns>name, value, path</columns> + <!--<file path="tool-data/bwa_0510_indices.loc" />--> + </table> + + <table name="seqal_indexes" comment_char="#"> + <columns>name, value, path</columns> + <!--<file path="tool-data/bwa_0510_indices.loc" />--> + </table> + + <table name="variant_tables" comment_char="#"> + <columns>name, value, path</columns> + <!--<file path="tool-data/bwa_0510_indices.loc" />--> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seal-galaxy-cc1b1911/tool_dependencies.xml Wed Oct 15 09:41:10 2014 -0400 @@ -0,0 +1,39 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="hadoop-galaxy" version="0.1.3"> + <repository changeset_revision="30bd2584b6a0" name="hadoop_galaxy" owner="crs4" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + + <package name="seal" version="13986416aa79561bd0102cb7ccc1e0668ac9f0a4"> + <install version="1.0"> + <actions> + <action type="shell_command">git clone https://github.com/crs4/seal.git</action> + <action type="shell_command">git checkout master</action> + <action type="shell_command">git reset --hard 13986416aa79561bd0102cb7ccc1e0668ac9f0a4</action> + <!--<action type="download_by_url">https://github.com/crs4/seal/archive/0.4.0-rc2.tar.gz</action>--> + <action type="set_environment_for_install"> + <environment_variable action="prepend_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> + </action> + <action type="make_directory">$INSTALL_DIR/lib/python</action> + <action type="shell_command">python setup.py build_hadoop_bam</action> + <action type="shell_command">python setup.py install --prefix=$INSTALL_DIR --install-lib=$INSTALL_DIR/lib/python</action> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + <environment_variable action="prepend_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> + </action> + </actions> + </install> + <readme> +This package has a number of dependencies that need to be installed before it: + +* Pydoop needs to be installed (it will be pulled down as a dependency; see +that package's instructions for it's own installation pointers) + +* protobuf-python + +* JDK and Ant (ant version at least version 1.7) + +Please see http://biodoop-seal.sourceforge.net/installation_dependencies.html for more details. + </readme> + </package> +</tool_dependency>