annotate sspace.py @ 0:b1be94418db9 draft

Uploaded
author crs4
date Tue, 17 Sep 2013 09:41:18 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b1be94418db9 Uploaded
crs4
parents:
diff changeset
1 # -*- coding: utf-8 -*-
b1be94418db9 Uploaded
crs4
parents:
diff changeset
2 """
b1be94418db9 Uploaded
crs4
parents:
diff changeset
3 SSPACE wrapper
b1be94418db9 Uploaded
crs4
parents:
diff changeset
4 """
b1be94418db9 Uploaded
crs4
parents:
diff changeset
5
b1be94418db9 Uploaded
crs4
parents:
diff changeset
6 import logging
b1be94418db9 Uploaded
crs4
parents:
diff changeset
7 import optparse
b1be94418db9 Uploaded
crs4
parents:
diff changeset
8 import os
b1be94418db9 Uploaded
crs4
parents:
diff changeset
9 import shutil
b1be94418db9 Uploaded
crs4
parents:
diff changeset
10 import subprocess
b1be94418db9 Uploaded
crs4
parents:
diff changeset
11 import tempfile
b1be94418db9 Uploaded
crs4
parents:
diff changeset
12
b1be94418db9 Uploaded
crs4
parents:
diff changeset
13
b1be94418db9 Uploaded
crs4
parents:
diff changeset
14 def which(name, flags=os.X_OK):
b1be94418db9 Uploaded
crs4
parents:
diff changeset
15 """ Search PATH for executable files with the given name. """
b1be94418db9 Uploaded
crs4
parents:
diff changeset
16 result = []
b1be94418db9 Uploaded
crs4
parents:
diff changeset
17 exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep))
b1be94418db9 Uploaded
crs4
parents:
diff changeset
18 path = os.environ.get('PATH', None)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
19 if path is None:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
20 return []
b1be94418db9 Uploaded
crs4
parents:
diff changeset
21 for p in os.environ.get('PATH', '').split(os.pathsep):
b1be94418db9 Uploaded
crs4
parents:
diff changeset
22 p = os.path.join(p, str(name))
b1be94418db9 Uploaded
crs4
parents:
diff changeset
23 if os.access(p, flags):
b1be94418db9 Uploaded
crs4
parents:
diff changeset
24 result.append(p)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
25 for e in exts:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
26 pext = p + e
b1be94418db9 Uploaded
crs4
parents:
diff changeset
27 if os.access(pext, flags):
b1be94418db9 Uploaded
crs4
parents:
diff changeset
28 result.append(pext)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
29 return result
b1be94418db9 Uploaded
crs4
parents:
diff changeset
30
b1be94418db9 Uploaded
crs4
parents:
diff changeset
31
b1be94418db9 Uploaded
crs4
parents:
diff changeset
32 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
b1be94418db9 Uploaded
crs4
parents:
diff changeset
33 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
b1be94418db9 Uploaded
crs4
parents:
diff changeset
34 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
b1be94418db9 Uploaded
crs4
parents:
diff changeset
35
b1be94418db9 Uploaded
crs4
parents:
diff changeset
36
b1be94418db9 Uploaded
crs4
parents:
diff changeset
37 def __main__():
b1be94418db9 Uploaded
crs4
parents:
diff changeset
38 """ main function """
b1be94418db9 Uploaded
crs4
parents:
diff changeset
39 parser = optparse.OptionParser()
b1be94418db9 Uploaded
crs4
parents:
diff changeset
40 parser.add_option('-c', dest='contigs', help='contigs mandatory (-s)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
41 parser.add_option('--r1', dest='reads1', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
42 parser.add_option('--r2', dest='reads2', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
43 parser.add_option('-i', dest='insert', type='int', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
44 parser.add_option('-e', dest='error', type='float', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
45 parser.add_option('-o', dest='orientation', choices=['FF', 'FR', 'RF', 'RR'], help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
46 parser.add_option('-x', action='store_true', dest='extension', help='whether to extend the contigs (-x)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
47 parser.add_option('--minoverlap', dest='minoverlap', type='int', help='minoverlap (-m)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
48 parser.add_option('--numofreads', dest='numofreads', type='int', help='num of reads to call an extension (-o)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
49 parser.add_option('-t', dest='max_trim', type='int', help='maximum number of bases to trim on the contig end when all possibilities have been exhausted for an extension (-t)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
50 parser.add_option('-u', dest='unpaired', help='unpaired reads (-u, optional)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
51 parser.add_option('-r', dest='min_base_ratio', type='float', help='minimum base ratio used to accept a overhang consensus base (-r)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
52 parser.add_option('--minlink', dest='minlink', type='int', help='min link (-k)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
53 parser.add_option('--maxratio', dest='maxratio', type='float', help='max ratio (-a)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
54 parser.add_option('--contigoverlap', dest='contigoverlap', type='int', help='contigoverlap (-n)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
55 parser.add_option('--mincontig', dest='mincontig', type='int', help='mincontig (-z)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
56 parser.add_option('-T', dest='n_threads', type='int', help='number of threads to use in Bowtie (-T)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
57 parser.add_option('-p', dest='prefix', default='sspace_pre', help='prefix (-b)')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
58 parser.add_option('--lib', dest='libraryname', default='galx', help='libraryfile')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
59 parser.add_option('--fe', dest='finalevidence', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
60 parser.add_option('--fs', dest='finalscaffolds', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
61 parser.add_option('--lg', dest='logfile', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
62 parser.add_option('--summ', dest='summaryfile', help='')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
63 parser.add_option('--loglevel', choices=LOG_LEVELS, help='logging level (default: INFO)', default='DEBUG')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
64 (options, args) = parser.parse_args()
b1be94418db9 Uploaded
crs4
parents:
diff changeset
65 if len(args) > 0:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
66 parser.error('Wrong number of arguments')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
67
b1be94418db9 Uploaded
crs4
parents:
diff changeset
68 log_level = getattr(logging, options.loglevel)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
69 kwargs = {'format' : LOG_FORMAT,
b1be94418db9 Uploaded
crs4
parents:
diff changeset
70 'datefmt' : LOG_DATEFMT,
b1be94418db9 Uploaded
crs4
parents:
diff changeset
71 'level' : log_level}
b1be94418db9 Uploaded
crs4
parents:
diff changeset
72 if options.logfile:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
73 kwargs['filename'] = options.logfile
b1be94418db9 Uploaded
crs4
parents:
diff changeset
74 logging.basicConfig(**kwargs)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
75 logger = logging.getLogger('SSPACE scaffold assembly')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
76
b1be94418db9 Uploaded
crs4
parents:
diff changeset
77 logger.debug('Creating temp dir')
b1be94418db9 Uploaded
crs4
parents:
diff changeset
78 contigs = options.contigs
b1be94418db9 Uploaded
crs4
parents:
diff changeset
79 reads1 = options.reads1
b1be94418db9 Uploaded
crs4
parents:
diff changeset
80 reads2 = options.reads2
b1be94418db9 Uploaded
crs4
parents:
diff changeset
81 insert = options.insert
b1be94418db9 Uploaded
crs4
parents:
diff changeset
82 error = options.error
b1be94418db9 Uploaded
crs4
parents:
diff changeset
83 orientation = options.orientation
b1be94418db9 Uploaded
crs4
parents:
diff changeset
84 extension = '-x 1' if options.extension else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
85 minoverlap = "-m %d" % options.minoverlap if options.minoverlap is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
86 numofreads = "-o %d" % options.numofreads if options.numofreads is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
87 max_trim = "-t %d" % options.max_trim if options.max_trim is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
88 unpaired = "-u %s" % options.unpaired if options.unpaired else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
89 min_base_ratio = "-r %s" % options.min_base_ratio if options.min_base_ratio is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
90 minlink = "-k %d" % options.minlink if options.minlink is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
91 maxratio = "-a %s" % options.maxratio if options.maxratio is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
92 contigoverlap = "-n %d" % options.contigoverlap if options.contigoverlap is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
93 mincontig = "-z %d" % options.mincontig if options.mincontig is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
94 n_threads = "-T %d" % options.n_threads if options.n_threads is not None else ''
b1be94418db9 Uploaded
crs4
parents:
diff changeset
95 prefix = options.prefix
b1be94418db9 Uploaded
crs4
parents:
diff changeset
96 libraryname = options.libraryname
b1be94418db9 Uploaded
crs4
parents:
diff changeset
97 finalevidence = options.finalevidence
b1be94418db9 Uploaded
crs4
parents:
diff changeset
98 finalscaffolds = options.finalscaffolds
b1be94418db9 Uploaded
crs4
parents:
diff changeset
99 summaryfile = options.summaryfile
b1be94418db9 Uploaded
crs4
parents:
diff changeset
100 exe_name = "SSPACE_Basic_v2.0.pl"
b1be94418db9 Uploaded
crs4
parents:
diff changeset
101
b1be94418db9 Uploaded
crs4
parents:
diff changeset
102 # Set library
b1be94418db9 Uploaded
crs4
parents:
diff changeset
103 outline = 'lib1 %s %s %d %s %s\n' % (reads1, reads2, insert, error, orientation)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
104 with open(libraryname, 'w') as out:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
105 out.write(outline)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
106
b1be94418db9 Uploaded
crs4
parents:
diff changeset
107 exe = which(exe_name, os.R_OK)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
108 command = "perl %s -l %s -s %s %s %s %s %s %s %s %s %s %s %s %s -b %s" % (exe.pop(), libraryname, contigs, extension, minoverlap, numofreads, max_trim, unpaired, min_base_ratio, minlink, maxratio, contigoverlap, mincontig, n_threads, prefix)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
109 wd = tempfile.mkdtemp()
b1be94418db9 Uploaded
crs4
parents:
diff changeset
110 try:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
111 os.chdir(wd)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
112 logger.info("SSPACE running")
b1be94418db9 Uploaded
crs4
parents:
diff changeset
113 logger.debug("executing %s" % command)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
114 subprocess.check_call(args=command, shell=True)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
115
b1be94418db9 Uploaded
crs4
parents:
diff changeset
116 # Clean environment
b1be94418db9 Uploaded
crs4
parents:
diff changeset
117 with open("%s.logfile.txt" % os.path.join(wd, prefix), 'rb') as sspace_log_file:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
118 logger.info("\n".join(["Log from SSPACE", sspace_log_file.read()]))
b1be94418db9 Uploaded
crs4
parents:
diff changeset
119 logger.info("Moving result files")
b1be94418db9 Uploaded
crs4
parents:
diff changeset
120 shutil.move("%s.final.evidence" % os.path.join(wd, prefix), finalevidence)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
121 shutil.move("%s.final.scaffolds.fasta" % os.path.join(wd, prefix), finalscaffolds)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
122 shutil.move("%s.summaryfile.txt" % os.path.join(wd, prefix), summaryfile)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
123 logger.debug("Removing temp dir: %s" % wd)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
124 finally:
b1be94418db9 Uploaded
crs4
parents:
diff changeset
125 shutil.rmtree(wd)
b1be94418db9 Uploaded
crs4
parents:
diff changeset
126
b1be94418db9 Uploaded
crs4
parents:
diff changeset
127
b1be94418db9 Uploaded
crs4
parents:
diff changeset
128 if __name__ == "__main__":
b1be94418db9 Uploaded
crs4
parents:
diff changeset
129 __main__()