Mercurial > repos > crs4 > ssake
comparison ssake.py @ 0:0ec408bcfc80 draft
Uploaded
| author | crs4 |
|---|---|
| date | Wed, 11 Sep 2013 12:51:21 -0400 |
| parents | |
| children | 386166019772 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0ec408bcfc80 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 """ | |
| 3 SSAKE wrapper | |
| 4 """ | |
| 5 | |
| 6 import logging | |
| 7 import optparse | |
| 8 import os | |
| 9 import shutil | |
| 10 import subprocess | |
| 11 import tempfile | |
| 12 | |
| 13 def execute(cmd): | |
| 14 """ """ | |
| 15 subprocess.check_call(args=cmd, stdout=open(os.devnull, 'w'), shell=True) | |
| 16 | |
| 17 | |
| 18 def which(name, flags=os.X_OK): | |
| 19 """ | |
| 20 Search PATH for executable files with the given name. | |
| 21 """ | |
| 22 result = [] | |
| 23 exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) | |
| 24 path = os.environ.get('PATH', None) | |
| 25 if path is None: | |
| 26 return [] | |
| 27 for p in os.environ.get('PATH', '').split(os.pathsep): | |
| 28 p = os.path.join(p, str(name)) | |
| 29 if os.access(p, flags): | |
| 30 result.append(p) | |
| 31 for e in exts: | |
| 32 pext = p + e | |
| 33 if os.access(pext, flags): | |
| 34 result.append(pext) | |
| 35 return result | |
| 36 | |
| 37 | |
| 38 class SSAKE: | |
| 39 def __init__(self, logger, options): | |
| 40 self.logger = logger | |
| 41 self.executables = ('SSAKE', 'makePairedOutput2EQUALfiles.pl', 'makePairedOutput2UNEQUALfiles.pl') | |
| 42 self.logger.debug(which(self.executables[0])) | |
| 43 self.logger.debug(which(self.executables[1])) | |
| 44 self.logger.debug(which(self.executables[2])) | |
| 45 self.logger.debug('Creating temp dir') | |
| 46 self.wd = tempfile.mkdtemp() | |
| 47 | |
| 48 self.kind_of_reads = int(options.kind_of_reads) | |
| 49 if not (self.kind_of_reads): | |
| 50 self.infile = options.if_unpaired | |
| 51 self.paired = 0 | |
| 52 else: | |
| 53 self.infile_r1 = options.if_paired_r1 | |
| 54 self.infile_r2 = options.if_paired_r2 | |
| 55 self.paired = 1 | |
| 56 self.insert_size = options.insert_size | |
| 57 self.minnumlinks = options.minnumlinks | |
| 58 self.error = options.error | |
| 59 self.maxlinkratio = options.maxlinkratio | |
| 60 self.minoverlap = options.minoverlap | |
| 61 self.mindepthofcoverage = options.mindepthofcoverage | |
| 62 self.minoverlappingbases = options.minoverlappingbases | |
| 63 self.mincall = options.mincall | |
| 64 self.baseratio = options.baseratio | |
| 65 self.ignore_header = options.ignore_header | |
| 66 self.prefix = options.prefix | |
| 67 self.contigs = options.contigs | |
| 68 self.log = options.logfile | |
| 69 self.short = options.short | |
| 70 self.singlets = options.singlets | |
| 71 if options.seeds_file: | |
| 72 self.seeds_file = options.seeds_file | |
| 73 | |
| 74 def run(self): | |
| 75 """ """ | |
| 76 os.chdir(self.wd) | |
| 77 seeds = '' | |
| 78 if hasattr(self, 'seeds_file'): | |
| 79 seeds = " -s %s" % self.seeds_file | |
| 80 if self.kind_of_reads == 1: | |
| 81 cmd = "%s %s %s %d" % ( | |
| 82 self.executables[1], self.infile_r1, self.infile_r2, | |
| 83 self.insert_size) | |
| 84 self.logger.info("Preparing data") | |
| 85 execute(cmd) | |
| 86 paired_file = "%s/paired.fa" % self.wd | |
| 87 command = "%s -f %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap) | |
| 88 elif self.kind_of_reads == 2: | |
| 89 cmd = "%s %s %s %d" % ( | |
| 90 self.executables[2], self.infile_r1, self.infile_r2, | |
| 91 self.insert_size) | |
| 92 self.logger.info("Preparing data") | |
| 93 execute(cmd) | |
| 94 paired_file = "%s/paired.fa" % self.wd | |
| 95 unpaired_file = "%s/unpaired.fa" % self.wd | |
| 96 command = "%s -f %s -g %s -k %d -e %s -a %s -x %d" % (self.executables[0], paired_file, unpaired_file, self.minnumlinks, self.error, self.maxlinkratio, self.minoverlap) | |
| 97 else: | |
| 98 command = "%s -f %s" % (self.executables[0], self.infile) | |
| 99 command += " %s -w %d -m %d -o %d -r %s -h %s -b %s -p %s" % (seeds, self.mindepthofcoverage, self.minoverlappingbases, self.mincall, self.baseratio, self.ignore_header, self.prefix, self.paired) | |
| 100 self.logger.debug(command) | |
| 101 self.logger.info("Executing SSAKE") | |
| 102 execute(command) | |
| 103 | |
| 104 with open("%s.log" % os.path.join(self.wd, self.prefix), 'rb') as ssake_log_file: | |
| 105 self.logger.info("\n".join(["Log from SSAKE", ssake_log_file.read()])) | |
| 106 self.logger.info("Moving result files") | |
| 107 shutil.move("%s.contigs" % os.path.join(self.wd, self.prefix), self.contigs) | |
| 108 shutil.move("%s.short" % os.path.join(self.wd, self.prefix), self.short) | |
| 109 shutil.move("%s.singlets" % os.path.join(self.wd, self.prefix), self.singlets) | |
| 110 | |
| 111 def __del__(self): | |
| 112 shutil.rmtree(self.wd) | |
| 113 | |
| 114 | |
| 115 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | |
| 116 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | |
| 117 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | |
| 118 | |
| 119 | |
| 120 def __main__(): | |
| 121 """ main function """ | |
| 122 parser = optparse.OptionParser() | |
| 123 parser.add_option('--if_unpaired', dest='if_unpaired', help='Unpaired FASTA input file name') | |
| 124 parser.add_option('--if_paired_r1', dest='if_paired_r1', help='Paired FASTA reads 1 input file name') | |
| 125 parser.add_option('--if_paired_r2', dest='if_paired_r2', help='Paired FASTA reads 2 input file name') | |
| 126 parser.add_option('-s', dest='seeds_file', help='FASTA as seeds, input file name') | |
| 127 parser.add_option('-w', dest='mindepthofcoverage', type='int', help='minimum depth of coverage allowed for contigs') | |
| 128 parser.add_option('-m', dest='minoverlappingbases', type='int', default=20, help='Minimum number of overlapping bases with the seed/contig during overhang consensus build up (default -m 20)') | |
| 129 parser.add_option('-o', dest='mincall', type='int', default=2, help='mincall -o ') | |
| 130 parser.add_option('-r', dest='baseratio', type='float', default=0.7, help='baseratio -r') | |
| 131 parser.add_option('-k', dest='minnumlinks', type='int', default=4, help='Minimum number of links (read pairs) to compute scaffold -k') | |
| 132 parser.add_option('-e', dest='error', type='float', default=0.75, help='Error (%) allowed on mean distance -e') | |
| 133 parser.add_option('-a', dest='maxlinkratio', type='float', default=0.5, help='Maximum link ratio between two best contig pairs -a') | |
| 134 parser.add_option('-x', dest='minoverlap', type='int', default=20, help='Minimum overlap required between contigs to merge adjacent contigs in a scaffold -x') | |
| 135 parser.add_option('--ignore_header', dest='ignore_header', choices=['0', '1'], default='1', help='Ignore read name/header *will use less RAM if set to 1* -h') | |
| 136 parser.add_option('--kind_of_reads', dest='kind_of_reads', choices=['0', '1', '2'], help='Kind of reads (-p)') | |
| 137 parser.add_option('--iz', dest='insert_size', type='int', help='Library insert size') | |
| 138 parser.add_option('--prefix', dest='prefix', default='ssake_pre', help='prefix') | |
| 139 parser.add_option('--out1', dest='contigs', help='contig file') | |
| 140 parser.add_option('--out2', dest='short', help='short file') | |
| 141 parser.add_option('--out3', dest='singlets', help='singlets file') | |
| 142 parser.add_option('--loglevel', choices=LOG_LEVELS, default='INFO', help='logging level (default: INFO)') | |
| 143 parser.add_option('--logfile', help='log file (default=stderr)') | |
| 144 (options, args) = parser.parse_args() | |
| 145 if len(args) > 0: | |
| 146 parser.error('Wrong number of arguments') | |
| 147 | |
| 148 log_level = getattr(logging, options.loglevel) | |
| 149 kwargs = {'format': LOG_FORMAT, | |
| 150 'datefmt': LOG_DATEFMT, | |
| 151 'level': log_level} | |
| 152 if options.logfile: | |
| 153 kwargs['filename'] = options.logfile | |
| 154 logging.basicConfig(**kwargs) | |
| 155 logger = logging.getLogger('SSAKE scaffold assembly') | |
| 156 | |
| 157 S = SSAKE(logger, options) | |
| 158 S.run() | |
| 159 return | |
| 160 | |
| 161 if __name__ == "__main__": | |
| 162 __main__() |
