# HG changeset patch # User crs4 # Date 1378918281 14400 # Node ID 0ec408bcfc806333d5b96826b6dcaea9166b0ab2 Uploaded diff -r 000000000000 -r 0ec408bcfc80 COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Wed Sep 11 12:51:21 2013 -0400 @@ -0,0 +1,24 @@ +Copyright © 2012-2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Massimiliano Orsini +Gianmauro Cuccuru +Nicola Soranzo + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff -r 000000000000 -r 0ec408bcfc80 ssake.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ssake.py Wed Sep 11 12:51:21 2013 -0400 @@ -0,0 +1,162 @@ +# -*- coding: utf-8 -*- +""" +SSAKE wrapper +""" + +import logging +import optparse +import os +import shutil +import subprocess +import tempfile + +def execute(cmd): + """ """ + subprocess.check_call(args=cmd, stdout=open(os.devnull, 'w'), shell=True) + + +def which(name, flags=os.X_OK): + """ + Search PATH for executable files with the given name. + """ + result = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + path = os.environ.get('PATH', None) + if path is None: + return [] + for p in os.environ.get('PATH', '').split(os.pathsep): + p = os.path.join(p, str(name)) + if os.access(p, flags): + result.append(p) + for e in exts: + pext = p + e + if os.access(pext, flags): + result.append(pext) + return result + + +class SSAKE: + def __init__(self, logger, options): + self.logger = logger + self.executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl') + self.logger.debug(which(self.executables[0])) + self.logger.debug(which(self.executables[1])) + self.logger.debug(which(self.executables[2])) + self.logger.debug('Creating temp dir') + self.wd = tempfile.mkdtemp() + + self.kind_of_reads = int(options.kind_of_reads) + if not (self.kind_of_reads): + self.infile = options.if_unpaired + self.paired = 0 + else: + self.infile_r1 = options.if_paired_r1 + self.infile_r2 = options.if_paired_r2 + self.paired = 1 + self.insert_size = options.insert_size + self.minnumlinks = options.minnumlinks + self.error = options.error + self.maxlinkratio = options.maxlinkratio + self.minoverlap = options.minoverlap + self.mindepthofcoverage = options.mindepthofcoverage + self.minoverlappingbases = options.minoverlappingbases + self.mincall = options.mincall + self.baseratio = options.baseratio + self.ignore_header = options.ignore_header + self.prefix = options.prefix + self.contigs = options.contigs + self.log = options.logfile + self.short = options.short + self.singlets = options.singlets + if options.seeds_file: + self.seeds_file = options.seeds_file + + def run(self): + """ """ + os.chdir(self.wd) + seeds = '' + if hasattr(self, 'seeds_file'): + seeds = " -s %s" % self.seeds_file + if self.kind_of_reads == 1: + cmd = "%s %s %s %d" % ( + self.executables[1], self.infile_r1, self.infile_r2, + self.insert_size) + self.logger.info("Preparing data") + execute(cmd) + paired_file = "%s/paired.fa" % self.wd + command = "%s -f %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap) + elif self.kind_of_reads == 2: + cmd = "%s %s %s %d" % ( + self.executables[2], self.infile_r1, self.infile_r2, + self.insert_size) + self.logger.info("Preparing data") + execute(cmd) + paired_file = "%s/paired.fa" % self.wd + unpaired_file = "%s/unpaired.fa" % self.wd + command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, unpaired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap) + else: + command = "%s -f %s" % (self.executables[0], self.infile) + command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, self.mindepthofcoverage, self.minoverlappingbases, self.mincall, self.baseratio, self.ignore_header, self.prefix, self.paired) + self.logger.debug(command) + self.logger.info("Executing SSAKE") + execute(command) + + with open("%s.log" % os.path.join(self.wd, self.prefix), 'rb') as ssake_log_file: + self.logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()])) + self.logger.info("Moving result files") + shutil.move("%s.contigs" % os.path.join(self.wd, self.prefix), self.contigs) + shutil.move("%s.short" % os.path.join(self.wd, self.prefix), self.short) + shutil.move("%s.singlets" % os.path.join(self.wd, self.prefix), self.singlets) + + def __del__(self): + shutil.rmtree(self.wd) + + +LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' +LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' +LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + + +def __main__(): + """ main function """ + parser = optparse.OptionParser() + parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name') + parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name') + parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name') + parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name') + parser.add_option('-w', dest='mindepthofcoverage', type='int', help='minimum depth of coverage allowed for contigs') + parser.add_option('-m', dest='minoverlappingbases', type='int', default=20, help='Minimum number of overlapping bases with the seed/contig during overhang consensus build up (default -m 20)') + parser.add_option('-o', dest='mincall', type='int', default=2, help='mincall -o ') + parser.add_option('-r', dest='baseratio', type='float', default=0.7, help='baseratio -r') + parser.add_option('-k', dest='minnumlinks', type='int', default=4, help='Minimum number of links (read pairs) to compute scaffold -k') + parser.add_option('-e', dest='error', type='float', default=0.75, help='Error (%) allowed on mean distance -e') + parser.add_option('-a', dest='maxlinkratio', type='float', default=0.5, help='Maximum link ratio between two best contig pairs -a') + parser.add_option('-x', dest='minoverlap', type='int', default=20, help='Minimum overlap required between contigs to merge adjacent contigs in a scaffold -x') + parser.add_option('--ignore_header', dest='ignore_header', choices=['0', '1'], default='1', help='Ignore read name/header *will use less RAM if set to 1* -h') + parser.add_option('--kind_of_reads', dest='kind_of_reads', choices=['0', '1', '2'], help='Kind of reads (-p)') + parser.add_option('--iz', dest='insert_size', type='int', help='Library insert size') + parser.add_option('--prefix', dest='prefix', default='ssake_pre', help='prefix') + parser.add_option('--out1', dest='contigs', help='contig file') + parser.add_option('--out2', dest='short', help='short file') + parser.add_option('--out3', dest='singlets', help='singlets file') + parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)') + parser.add_option('--logfile', help='log file (default=stderr)') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + log_level = getattr(logging, options.loglevel) + kwargs = {'format': LOG_FORMAT, + 'datefmt': LOG_DATEFMT, + 'level': log_level} + if options.logfile: + kwargs['filename'] = options.logfile + logging.basicConfig(**kwargs) + logger = logging.getLogger('SSAKE scaffold assembly') + + S = SSAKE(logger, options) + S.run() + return + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 0ec408bcfc80 ssake.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ssake.xml Wed Sep 11 12:51:21 2013 -0400 @@ -0,0 +1,99 @@ + + short DNA sequences assembler + + ssake + + + ssake.py + #if $kind_of_reads.kind_of_reads_select == '0' + --if_unpaired $infile + #else + --if_paired_r1 $infile_r1 + --if_paired_r2 $infile_r2 + --iz ${kind_of_reads.insert_size} + -k ${kind_of_reads.minnumlinks} + -e ${kind_of_reads.error} + -a ${kind_of_reads.maxlinkratio} + -x ${kind_of_reads.minoverlap} + #end if + #if $seeds + -s $seeds + #end if + -w $mindepthofcoverage + -m $minoverlap + -o $mincall + -r $baseratio + --ignore_header 1 + --kind_of_reads ${kind_of_reads.kind_of_reads_select} + --out1 $contig + --out2 $short + --out3 $singlets + --logfile $log + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +SSAKE is a genomics application for de novo assembly of millions of very short DNA sequences. +It is an easy-to-use, robust, reliable and tractable clustering algorithm for very short sequence reads, such as those generated by Illumina Ltd. + +**License and citation** + +This Galaxy tool is Copyright © 2012-2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `SSAKE`_, which is licensed separately. Please cite |Warren2007|_. + +.. _SSAKE: http://www.bcgsc.ca/platform/bioinfo/software/ssake/ +.. |Warren2007| replace:: Warren RL, Sutton GG, Jones SJM, Holt RA. 2007. Assembling millions of short DNA sequences using SSAKE. Bioinformatics. 23(4):500-501 +.. _Warren2007: http://bioinformatics.oxfordjournals.org/content/23/4/500 + + diff -r 000000000000 -r 0ec408bcfc80 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Sep 11 12:51:21 2013 -0400 @@ -0,0 +1,22 @@ + + + + + + http://www.bcgsc.ca/platform/bioinfo/software/ssake/releases/3.8/ssake_v3-8-tar.gz + + sed -i -e 's/require "getopts.pl"/use Getopt::Std/' -e 's/&Getopts/getopts/' SSAKE tools/TQSfastq.pl + + . + $INSTALL_DIR + + + $INSTALL_DIR + + + $INSTALL_DIR/tools + + + + +