| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 """
 | 
| 
 | 
     3 Originally written by Kelly Vincent
 | 
| 
 | 
     4 pretty output and additional picard wrappers by Ross Lazarus for rgenetics
 | 
| 
 | 
     5 Runs all available wrapped Picard tools.
 | 
| 
 | 
     6 usage: picard_wrapper.py [options]
 | 
| 
 | 
     7 code Ross wrote licensed under the LGPL
 | 
| 
 | 
     8 see http://www.gnu.org/copyleft/lesser.html
 | 
| 
 | 
     9 """
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 import optparse, os, sys, subprocess, tempfile, shutil, time, logging
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
 | 
| 
 | 
    14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 | 
| 
 | 
    15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 | 
| 
 | 
    16 <head>
 | 
| 
 | 
    17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 | 
| 
 | 
    18 <meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" />
 | 
| 
 | 
    19 <title></title>
 | 
| 
 | 
    20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
 | 
| 
 | 
    21 </head>
 | 
| 
 | 
    22 <body>
 | 
| 
 | 
    23 <div class="document">
 | 
| 
 | 
    24 """
 | 
| 
 | 
    25 galhtmlattr = """Galaxy tool %s run at %s</b><br/>"""
 | 
| 
 | 
    26 galhtmlpostfix = """</div></body></html>\n"""
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 def stop_err( msg ):
 | 
| 
 | 
    30     sys.stderr.write( '%s\n' % msg )
 | 
| 
 | 
    31     sys.exit()
 | 
| 
 | 
    32     
 | 
| 
 | 
    33 
 | 
| 
 | 
    34 def timenow():
 | 
| 
 | 
    35     """return current time as a string
 | 
| 
 | 
    36     """
 | 
| 
 | 
    37     return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
 | 
| 
 | 
    38 
 | 
| 
 | 
    39 
 | 
| 
 | 
    40 class PicardBase():
 | 
| 
 | 
    41     """
 | 
| 
 | 
    42     simple base class with some utilities for Picard
 | 
| 
 | 
    43     adapted and merged with Kelly Vincent's code april 2011 Ross
 | 
| 
 | 
    44     lots of changes...
 | 
| 
 | 
    45     """
 | 
| 
 | 
    46     
 | 
| 
 | 
    47     def __init__(self, opts=None,arg0=None):
 | 
| 
 | 
    48         """ common stuff needed at init for a picard tool
 | 
| 
 | 
    49         """
 | 
| 
 | 
    50         assert opts <> None, 'PicardBase needs opts at init'
 | 
| 
 | 
    51         self.opts = opts
 | 
| 
 | 
    52         if self.opts.outdir == None:
 | 
| 
 | 
    53              self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir
 | 
| 
 | 
    54         assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in'
 | 
| 
 | 
    55         self.picname = self.baseName(opts.jar)
 | 
| 
 | 
    56         if self.picname.startswith('picard'):
 | 
| 
 | 
    57             self.picname = opts.picard_cmd # special case for some tools like replaceheader?
 | 
| 
 | 
    58         self.progname = self.baseName(arg0)
 | 
| 
 | 
    59         self.version = '0.002'
 | 
| 
 | 
    60         self.delme = [] # list of files to destroy
 | 
| 
 | 
    61         self.title = opts.title
 | 
| 
 | 
    62         self.inputfile = opts.input
 | 
| 
 | 
    63         try:
 | 
| 
 | 
    64             os.makedirs(opts.outdir)
 | 
| 
 | 
    65         except:
 | 
| 
 | 
    66             pass
 | 
| 
 | 
    67         try:
 | 
| 
 | 
    68             os.makedirs(opts.tmpdir)
 | 
| 
 | 
    69         except:
 | 
| 
 | 
    70             pass
 | 
| 
 | 
    71         self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname)
 | 
| 
 | 
    72         self.metricsOut =  os.path.join(opts.outdir,'%s.metrics.txt' % self.picname)
 | 
| 
 | 
    73         self.setLogging(logfname=self.log_filename)
 | 
| 
 | 
    74  
 | 
| 
 | 
    75     def baseName(self,name=None):
 | 
| 
 | 
    76         return os.path.splitext(os.path.basename(name))[0]
 | 
| 
 | 
    77 
 | 
| 
 | 
    78     def setLogging(self,logfname="picard_wrapper.log"):
 | 
| 
 | 
    79         """setup a logger
 | 
| 
 | 
    80         """
 | 
| 
 | 
    81         logging.basicConfig(level=logging.INFO,
 | 
| 
 | 
    82                     filename=logfname,
 | 
| 
 | 
    83                     filemode='a')
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 
 | 
| 
 | 
    86     def readLarge(self,fname=None):
 | 
| 
 | 
    87         """ read a potentially huge file.
 | 
| 
 | 
    88         """
 | 
| 
 | 
    89         try:
 | 
| 
 | 
    90             # get stderr, allowing for case where it's very large
 | 
| 
 | 
    91             tmp = open( fname, 'rb' )
 | 
| 
 | 
    92             s = ''
 | 
| 
 | 
    93             buffsize = 1048576
 | 
| 
 | 
    94             try:
 | 
| 
 | 
    95                 while True:
 | 
| 
 | 
    96                     more = tmp.read( buffsize )
 | 
| 
 | 
    97                     if len(more) > 0:
 | 
| 
 | 
    98                         s += more
 | 
| 
 | 
    99                     else:
 | 
| 
 | 
   100                         break
 | 
| 
 | 
   101             except OverflowError:
 | 
| 
 | 
   102                 pass
 | 
| 
 | 
   103             tmp.close()
 | 
| 
 | 
   104         except Exception, e:
 | 
| 
 | 
   105             stop_err( 'Error : %s' % str( e ) )   
 | 
| 
 | 
   106         return s
 | 
| 
 | 
   107     
 | 
| 
 | 
   108     def runCL(self,cl=None,output_dir=None):
 | 
| 
 | 
   109         """ construct and run a command line
 | 
| 
 | 
   110         we have galaxy's temp path as opt.temp_dir so don't really need isolation
 | 
| 
 | 
   111         sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts
 | 
| 
 | 
   112         """
 | 
| 
 | 
   113         assert cl <> None, 'PicardBase runCL needs a command line as cl'
 | 
| 
 | 
   114         if output_dir == None:
 | 
| 
 | 
   115             output_dir = self.opts.outdir
 | 
| 
 | 
   116         if type(cl) == type([]):
 | 
| 
 | 
   117             cl = ' '.join(cl)
 | 
| 
 | 
   118         fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt')
 | 
| 
 | 
   119         tlf = open(templog,'wb')
 | 
| 
 | 
   120         fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt')
 | 
| 
 | 
   121         tef = open(temperr,'wb')
 | 
| 
 | 
   122         process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir)
 | 
| 
 | 
   123         rval = process.wait()
 | 
| 
 | 
   124         tlf.close()
 | 
| 
 | 
   125         tef.close()
 | 
| 
 | 
   126         stderrs = self.readLarge(temperr)
 | 
| 
 | 
   127         stdouts = self.readLarge(templog)        
 | 
| 
 | 
   128         if len(stderrs) > 0:
 | 
| 
 | 
   129             s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs)
 | 
| 
 | 
   130         else:
 | 
| 
 | 
   131             s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval)
 | 
| 
 | 
   132         logging.info(s)
 | 
| 
 | 
   133         os.unlink(templog) # always
 | 
| 
 | 
   134         os.unlink(temperr) # always
 | 
| 
 | 
   135         return s, stdouts # sometimes this is an output
 | 
| 
 | 
   136     
 | 
| 
 | 
   137     def runPic(self, jar, cl):
 | 
| 
 | 
   138         """
 | 
| 
 | 
   139         cl should be everything after the jar file name in the command
 | 
| 
 | 
   140         """
 | 
| 
 | 
   141         runme = ['java -Xmx%s' % self.opts.maxjheap]
 | 
| 
 | 
   142         runme.append('-jar %s' % jar)
 | 
| 
 | 
   143         runme += cl
 | 
| 
 | 
   144         s,stdout = self.runCL(cl=runme, output_dir=self.opts.outdir)
 | 
| 
 | 
   145         return stdout
 | 
| 
 | 
   146 
 | 
| 
 | 
   147     def samToBam(self,infile=None,outdir=None):
 | 
| 
 | 
   148         """
 | 
| 
 | 
   149         use samtools view to convert sam to bam
 | 
| 
 | 
   150         """
 | 
| 
 | 
   151         fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam')
 | 
| 
 | 
   152         cl = ['samtools view -h -b -S -o ',tempbam,infile]
 | 
| 
 | 
   153         tlog,stdouts = self.runCL(cl,outdir)
 | 
| 
 | 
   154         return tlog,tempbam
 | 
| 
 | 
   155 
 | 
| 
 | 
   156     #def bamToSam(self,infile=None,outdir=None):
 | 
| 
 | 
   157     #    """
 | 
| 
 | 
   158     #    use samtools view to convert bam to sam
 | 
| 
 | 
   159     #    """
 | 
| 
 | 
   160     #    fd,tempsam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.sam')
 | 
| 
 | 
   161     #    cl = ['samtools view -h -o ',tempsam,infile]
 | 
| 
 | 
   162     #    tlog,stdouts = self.runCL(cl,outdir)
 | 
| 
 | 
   163     #    return tlog,tempsam
 | 
| 
 | 
   164 
 | 
| 
 | 
   165     def sortSam(self, infile=None,outfile=None,outdir=None):
 | 
| 
 | 
   166         """
 | 
| 
 | 
   167         """
 | 
| 
 | 
   168         print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir)
 | 
| 
 | 
   169         cl = ['samtools sort',infile,outfile]
 | 
| 
 | 
   170         tlog,stdouts = self.runCL(cl,outdir)
 | 
| 
 | 
   171         return tlog
 | 
| 
 | 
   172 
 | 
| 
 | 
   173     def cleanup(self):
 | 
| 
 | 
   174         for fname in self.delme:
 | 
| 
 | 
   175             try:
 | 
| 
 | 
   176                 os.unlink(fname)
 | 
| 
 | 
   177             except:
 | 
| 
 | 
   178                 pass
 | 
| 
 | 
   179                     
 | 
| 
 | 
   180     def prettyPicout(self,transpose,maxrows):
 | 
| 
 | 
   181         """organize picard outpouts into a report html page
 | 
| 
 | 
   182         """
 | 
| 
 | 
   183         res = []
 | 
| 
 | 
   184         try:
 | 
| 
 | 
   185             r = open(self.metricsOut,'r').readlines()
 | 
| 
 | 
   186         except:
 | 
| 
 | 
   187             r = []        
 | 
| 
 | 
   188         if len(r) > 0:
 | 
| 
 | 
   189             res.append('<b>Picard on line resources</b><ul>\n')
 | 
| 
 | 
   190             res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
 | 
| 
 | 
   191             res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
 | 
| 
 | 
   192             if transpose:
 | 
| 
 | 
   193                 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n')       
 | 
| 
 | 
   194             else:
 | 
| 
 | 
   195                 res.append('<b>Picard output</b><hr/>\n')  
 | 
| 
 | 
   196             res.append('<table cellpadding="3" >\n')
 | 
| 
 | 
   197             dat = []
 | 
| 
 | 
   198             heads = []
 | 
| 
 | 
   199             lastr = len(r) - 1
 | 
| 
 | 
   200             # special case for estimate library complexity hist
 | 
| 
 | 
   201             thist = False
 | 
| 
 | 
   202             for i,row in enumerate(r):
 | 
| 
 | 
   203                 if row.strip() > '':
 | 
| 
 | 
   204                     srow = row.split('\t')
 | 
| 
 | 
   205                     if row.startswith('#'):
 | 
| 
 | 
   206                         heads.append(row.strip()) # want strings
 | 
| 
 | 
   207                     else:
 | 
| 
 | 
   208                         dat.append(srow) # want lists
 | 
| 
 | 
   209                     if row.startswith('## HISTOGRAM'):
 | 
| 
 | 
   210                         thist = True
 | 
| 
 | 
   211             if len(heads) > 0:
 | 
| 
 | 
   212                 hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)]
 | 
| 
 | 
   213                 res += hres
 | 
| 
 | 
   214                 heads = []
 | 
| 
 | 
   215             if len(dat) > 0:
 | 
| 
 | 
   216                 if transpose and not thist:
 | 
| 
 | 
   217                     tdat = map(None,*dat) # transpose an arbitrary list of lists
 | 
| 
 | 
   218                     tdat = ['<tr class="d%d"><td>%s</td><td>%s </td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)] 
 | 
| 
 | 
   219                 else:
 | 
| 
 | 
   220                     tdat = ['\t'.join(x).strip() for x in dat] # back to strings :(
 | 
| 
 | 
   221                     tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)]
 | 
| 
 | 
   222                 res += tdat
 | 
| 
 | 
   223                 dat = []
 | 
| 
 | 
   224             res.append('</table>\n')   
 | 
| 
 | 
   225         return res
 | 
| 
 | 
   226 
 | 
| 
 | 
   227     def fixPicardOutputs(self,transpose,maxloglines):
 | 
| 
 | 
   228         """
 | 
| 
 | 
   229         picard produces long hard to read tab header files
 | 
| 
 | 
   230         make them available but present them transposed for readability
 | 
| 
 | 
   231         """
 | 
| 
 | 
   232         logging.shutdown()
 | 
| 
 | 
   233         self.cleanup() # remove temp files stored in delme
 | 
| 
 | 
   234         rstyle="""<style type="text/css">
 | 
| 
 | 
   235         tr.d0 td {background-color: oldlace; color: black;}
 | 
| 
 | 
   236         tr.d1 td {background-color: aliceblue; color: black;}
 | 
| 
 | 
   237         </style>"""    
 | 
| 
 | 
   238         res = [rstyle,]
 | 
| 
 | 
   239         res.append(galhtmlprefix % self.progname)   
 | 
| 
 | 
   240         res.append(galhtmlattr % (self.picname,timenow()))
 | 
| 
 | 
   241         flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')] 
 | 
| 
 | 
   242         pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
 | 
| 
 | 
   243         if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
 | 
| 
 | 
   244             for p in pdflist:
 | 
| 
 | 
   245                 imghref = '%s.jpg' % os.path.splitext(p)[0] # removes .pdf
 | 
| 
 | 
   246                 res.append('<table cellpadding="10"><tr><td>\n')
 | 
| 
 | 
   247                 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref)) 
 | 
| 
 | 
   248                 res.append('</tr></td></table>\n')   
 | 
| 
 | 
   249         if len(flist) > 0:
 | 
| 
 | 
   250             res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>')
 | 
| 
 | 
   251             res.append('<table>\n')
 | 
| 
 | 
   252             for i,f in enumerate(flist):
 | 
| 
 | 
   253                 fn = os.path.split(f)[-1]
 | 
| 
 | 
   254                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
 | 
| 
 | 
   255             res.append('</table><p/>\n') 
 | 
| 
 | 
   256         pres = self.prettyPicout(transpose,maxloglines)
 | 
| 
 | 
   257         if len(pres) > 0:
 | 
| 
 | 
   258             res += pres
 | 
| 
 | 
   259         l = open(self.log_filename,'r').readlines()
 | 
| 
 | 
   260         llen = len(l)
 | 
| 
 | 
   261         if llen > 0: 
 | 
| 
 | 
   262             res.append('<b>Picard Tool Run Log</b><hr/>\n') 
 | 
| 
 | 
   263             rlog = ['<pre>',]
 | 
| 
 | 
   264             if llen > maxloglines:
 | 
| 
 | 
   265                 n = min(50,int(maxloglines/2))
 | 
| 
 | 
   266                 rlog += l[:n]
 | 
| 
 | 
   267                 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines))
 | 
| 
 | 
   268                 rlog += l[-n:]
 | 
| 
 | 
   269             else:
 | 
| 
 | 
   270                 rlog += l
 | 
| 
 | 
   271             rlog.append('</pre>')
 | 
| 
 | 
   272             if llen > maxloglines:
 | 
| 
 | 
   273                 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename))
 | 
| 
 | 
   274             res += rlog
 | 
| 
 | 
   275         else:
 | 
| 
 | 
   276             res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename)
 | 
| 
 | 
   277         res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n') 
 | 
| 
 | 
   278         res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool')   
 | 
| 
 | 
   279         res.append(galhtmlpostfix) 
 | 
| 
 | 
   280         outf = open(self.opts.htmlout,'w')
 | 
| 
 | 
   281         outf.write(''.join(res))   
 | 
| 
 | 
   282         outf.write('\n')
 | 
| 
 | 
   283         outf.close()
 | 
| 
 | 
   284 
 | 
| 
 | 
   285     def makePicInterval(self,inbed=None,outf=None):
 | 
| 
 | 
   286         """
 | 
| 
 | 
   287         picard wants bait and target files to have the same header length as the incoming bam/sam 
 | 
| 
 | 
   288         a meaningful (ie accurate) representation will fail because of this - so this hack
 | 
| 
 | 
   289         it would be far better to be able to supply the original bed untouched
 | 
| 
 | 
   290         """
 | 
| 
 | 
   291         assert inbed <> None
 | 
| 
 | 
   292         bed = open(inbed,'r').readlines()
 | 
| 
 | 
   293         thead = os.path.join(self.opts.outdir,'tempSamHead.txt')
 | 
| 
 | 
   294         if self.opts.datatype == 'sam':
 | 
| 
 | 
   295             cl = ['samtools view -H -S',self.opts.input,'>',thead]
 | 
| 
 | 
   296         else:
 | 
| 
 | 
   297             cl = ['samtools view -H',self.opts.input,'>',thead]
 | 
| 
 | 
   298         self.runCL(cl=cl,output_dir=self.opts.outdir)
 | 
| 
 | 
   299         head = open(thead,'r').readlines()
 | 
| 
 | 
   300         s = '## got %d rows of header\n' % (len(head))
 | 
| 
 | 
   301         logging.info(s)
 | 
| 
 | 
   302         o = open(outf,'w')
 | 
| 
 | 
   303         o.write(''.join(head))
 | 
| 
 | 
   304         o.write(''.join(bed))
 | 
| 
 | 
   305         o.close()
 | 
| 
 | 
   306         return outf
 | 
| 
 | 
   307 
 | 
| 
 | 
   308     def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None):
 | 
| 
 | 
   309         """
 | 
| 
 | 
   310         interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs!
 | 
| 
 | 
   311         Do the work of removing all the error sequences
 | 
| 
 | 
   312         pysam is cool
 | 
| 
 | 
   313         infile = pysam.Samfile( "-", "r" )
 | 
| 
 | 
   314         outfile = pysam.Samfile( "-", "w", template = infile )
 | 
| 
 | 
   315         for s in infile: outfile.write(s)
 | 
| 
 | 
   316 
 | 
| 
 | 
   317         errors from ValidateSameFile.jar look like
 | 
| 
 | 
   318         WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing
 | 
| 
 | 
   319         ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary.
 | 
| 
 | 
   320         ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041
 | 
| 
 | 
   321 
 | 
| 
 | 
   322         """
 | 
| 
 | 
   323         assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam
 | 
| 
 | 
   324         assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path'
 | 
| 
 | 
   325         removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2]
 | 
| 
 | 
   326         remDict = dict(zip(removeNames,range(len(removeNames))))
 | 
| 
 | 
   327         infile = pysam.Samfile(insam,'rb')
 | 
| 
 | 
   328         info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict))
 | 
| 
 | 
   329         if len(removeNames) > 0:
 | 
| 
 | 
   330             outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file
 | 
| 
 | 
   331             i = 0
 | 
| 
 | 
   332             j = 0
 | 
| 
 | 
   333             for row in infile:
 | 
| 
 | 
   334                 dropme = remDict.get(row.qname,None) # keep if None
 | 
| 
 | 
   335                 if not dropme:
 | 
| 
 | 
   336                     outfile.write(row)
 | 
| 
 | 
   337                     j += 1
 | 
| 
 | 
   338                 else: # discard
 | 
| 
 | 
   339                     i += 1
 | 
| 
 | 
   340             info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam))
 | 
| 
 | 
   341             outfile.close()
 | 
| 
 | 
   342             infile.close()
 | 
| 
 | 
   343         else: # we really want a nullop or a simple pointer copy
 | 
| 
 | 
   344             infile.close()
 | 
| 
 | 
   345             if newsam:
 | 
| 
 | 
   346                 shutil.copy(insam,newsam)
 | 
| 
 | 
   347         logging.info(info)
 | 
| 
 | 
   348                 
 | 
| 
 | 
   349 
 | 
| 
 | 
   350 
 | 
| 
 | 
   351 def __main__():
 | 
| 
 | 
   352     doFix = False # tools returning htmlfile don't need this
 | 
| 
 | 
   353     doTranspose = True # default
 | 
| 
 | 
   354     maxloglines = 100 # default 
 | 
| 
 | 
   355     #Parse Command Line
 | 
| 
 | 
   356     op = optparse.OptionParser()
 | 
| 
 | 
   357     # All tools
 | 
| 
 | 
   358     op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' )
 | 
| 
 | 
   359     op.add_option('-e', '--inputext', default=None)
 | 
| 
 | 
   360     op.add_option('-o', '--output', default=None)
 | 
| 
 | 
   361     op.add_option('-n', '--title', default="Pick a Picard Tool")
 | 
| 
 | 
   362     op.add_option('-t', '--htmlout', default=None)
 | 
| 
 | 
   363     op.add_option('-d', '--outdir', default=None)
 | 
| 
 | 
   364     op.add_option('-x', '--maxjheap', default='4g')
 | 
| 
 | 
   365     op.add_option('-b', '--bisulphite', default='false')
 | 
| 
 | 
   366     op.add_option('-s', '--sortorder', default='query')     
 | 
| 
 | 
   367     op.add_option('','--tmpdir', default='/tmp')
 | 
| 
 | 
   368     op.add_option('-j','--jar',default='')    
 | 
| 
 | 
   369     op.add_option('','--picard-cmd',default=None)    
 | 
| 
 | 
   370     # Many tools
 | 
| 
 | 
   371     op.add_option( '', '--output-format', dest='output_format', help='Output format' )
 | 
| 
 | 
   372     op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' )
 | 
| 
 | 
   373     op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None )
 | 
| 
 | 
   374     # CreateSequenceDictionary
 | 
| 
 | 
   375     op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None )
 | 
| 
 | 
   376     op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' )
 | 
| 
 | 
   377     op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' )
 | 
| 
 | 
   378     op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' )
 | 
| 
 | 
   379     # MarkDuplicates
 | 
| 
 | 
   380     op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' )
 | 
| 
 | 
   381     op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' )
 | 
| 
 | 
   382     # CollectInsertSizeMetrics
 | 
| 
 | 
   383     op.add_option('', '--taillimit', default="0")
 | 
| 
 | 
   384     op.add_option('', '--histwidth', default="0")
 | 
| 
 | 
   385     op.add_option('', '--minpct', default="0.01")
 | 
| 
 | 
   386     # CollectAlignmentSummaryMetrics
 | 
| 
 | 
   387     op.add_option('', '--maxinsert', default="20")
 | 
| 
 | 
   388     op.add_option('', '--adaptors', action='append', type="string")
 | 
| 
 | 
   389     # FixMateInformation and validate
 | 
| 
 | 
   390     # CollectGcBiasMetrics
 | 
| 
 | 
   391     op.add_option('', '--windowsize', default='100')
 | 
| 
 | 
   392     op.add_option('', '--mingenomefrac', default='0.00001')    
 | 
| 
 | 
   393     # AddOrReplaceReadGroups
 | 
| 
 | 
   394     op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' )
 | 
| 
 | 
   395     op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' )
 | 
| 
 | 
   396     op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' )
 | 
| 
 | 
   397     op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' )
 | 
| 
 | 
   398     op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' )
 | 
| 
 | 
   399     op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' )
 | 
| 
 | 
   400     op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' )
 | 
| 
 | 
   401     op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' )
 | 
| 
 | 
   402     # ReorderSam
 | 
| 
 | 
   403     op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' )
 | 
| 
 | 
   404     op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' )
 | 
| 
 | 
   405     # ReplaceSamHeader
 | 
| 
 | 
   406     op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' )
 | 
| 
 | 
   407 
 | 
| 
 | 
   408     op.add_option('','--assumesorted', default='true') 
 | 
| 
 | 
   409     op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*")
 | 
| 
 | 
   410     #estimatelibrarycomplexity
 | 
| 
 | 
   411     op.add_option('','--minid', default="5")
 | 
| 
 | 
   412     op.add_option('','--maxdiff', default="0.03")
 | 
| 
 | 
   413     op.add_option('','--minmeanq', default="20")
 | 
| 
 | 
   414     #hsmetrics
 | 
| 
 | 
   415     op.add_option('','--baitbed', default=None)
 | 
| 
 | 
   416     op.add_option('','--targetbed', default=None)
 | 
| 
 | 
   417     #validate
 | 
| 
 | 
   418     op.add_option('','--ignoreflags', action='append', type="string")
 | 
| 
 | 
   419     op.add_option('','--maxerrors', default=None)
 | 
| 
 | 
   420     op.add_option('','--datatype', default=None)
 | 
| 
 | 
   421     op.add_option('','--bamout', default=None)
 | 
| 
 | 
   422     op.add_option('','--samout', default=None)
 | 
| 
 | 
   423 
 | 
| 
 | 
   424     opts, args = op.parse_args()
 | 
| 
 | 
   425     opts.sortme = opts.assumesorted == 'false'
 | 
| 
 | 
   426     assert opts.input <> None
 | 
| 
 | 
   427     # need to add
 | 
| 
 | 
   428     # instance that does all the work
 | 
| 
 | 
   429     pic = PicardBase(opts,sys.argv[0])
 | 
| 
 | 
   430 
 | 
| 
 | 
   431     tmp_dir = opts.outdir
 | 
| 
 | 
   432     haveTempout = False # we use this where sam output is an option
 | 
| 
 | 
   433 
 | 
| 
 | 
   434     # set ref and dict files to use (create if necessary)
 | 
| 
 | 
   435     ref_file_name = opts.ref
 | 
| 
 | 
   436     if opts.ref_file <> None:
 | 
| 
 | 
   437         csd = 'CreateSequenceDictionary'
 | 
| 
 | 
   438         realjarpath = os.path.split(opts.jar)[0]
 | 
| 
 | 
   439         jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq
 | 
| 
 | 
   440         tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname)
 | 
| 
 | 
   441         ref_file_name = '%s.fasta' % tmp_ref_name
 | 
| 
 | 
   442         # build dict
 | 
| 
 | 
   443         dict_file_name = '%s.dict' % tmp_ref_name
 | 
| 
 | 
   444         os.symlink( opts.ref_file, ref_file_name )
 | 
| 
 | 
   445         cl = ['REFERENCE=%s' % ref_file_name]
 | 
| 
 | 
   446         cl.append('OUTPUT=%s' % dict_file_name)
 | 
| 
 | 
   447         cl.append('URI=%s' % os.path.basename( opts.ref_file ))
 | 
| 
 | 
   448         cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names)
 | 
| 
 | 
   449         if opts.species_name:
 | 
| 
 | 
   450             cl.append('SPECIES=%s' % opts.species_name)
 | 
| 
 | 
   451         if opts.build_name:
 | 
| 
 | 
   452             cl.append('GENOME_ASSEMBLY=%s' % opts.build_name)
 | 
| 
 | 
   453         pic.delme.append(dict_file_name)
 | 
| 
 | 
   454         pic.delme.append(ref_file_name)
 | 
| 
 | 
   455         pic.delme.append(tmp_ref_name)
 | 
| 
 | 
   456         s = pic.runPic(jarpath, cl)
 | 
| 
 | 
   457         # run relevant command(s)
 | 
| 
 | 
   458 
 | 
| 
 | 
   459     # define temporary output
 | 
| 
 | 
   460     # if output is sam, it must have that extension, otherwise bam will be produced
 | 
| 
 | 
   461     # specify sam or bam file with extension
 | 
| 
 | 
   462     if opts.output_format == 'sam':
 | 
| 
 | 
   463         suff = '.sam'
 | 
| 
 | 
   464     else:
 | 
| 
 | 
   465         suff = ''
 | 
| 
 | 
   466     tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff )
 | 
| 
 | 
   467 
 | 
| 
 | 
   468     cl = ['VALIDATION_STRINGENCY=LENIENT',]
 | 
| 
 | 
   469 
 | 
| 
 | 
   470     if pic.picname == 'AddOrReplaceReadGroups':
 | 
| 
 | 
   471         # sort order to match Galaxy's default
 | 
| 
 | 
   472         cl.append('SORT_ORDER=coordinate')
 | 
| 
 | 
   473         # input
 | 
| 
 | 
   474         cl.append('INPUT=%s' % opts.input)
 | 
| 
 | 
   475         # outputs
 | 
| 
 | 
   476         cl.append('OUTPUT=%s' % tempout)
 | 
| 
 | 
   477         # required read groups
 | 
| 
 | 
   478         cl.append('RGLB="%s"' % opts.rg_library)
 | 
| 
 | 
   479         cl.append('RGPL="%s"' % opts.rg_platform)
 | 
| 
 | 
   480         cl.append('RGPU="%s"' % opts.rg_plat_unit)
 | 
| 
 | 
   481         cl.append('RGSM="%s"' % opts.rg_sample)
 | 
| 
 | 
   482         if opts.rg_id:
 | 
| 
 | 
   483             cl.append('RGID="%s"' % opts.rg_id)
 | 
| 
 | 
   484         # optional read groups
 | 
| 
 | 
   485         if opts.rg_seq_center:
 | 
| 
 | 
   486             cl.append('RGCN="%s"' % opts.rg_seq_center)
 | 
| 
 | 
   487         if opts.rg_desc:
 | 
| 
 | 
   488             cl.append('RGDS="%s"' % opts.rg_desc)
 | 
| 
 | 
   489         pic.runPic(opts.jar, cl)
 | 
| 
 | 
   490         haveTempout = True
 | 
| 
 | 
   491 
 | 
| 
 | 
   492     elif pic.picname == 'BamIndexStats':
 | 
| 
 | 
   493         tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir )
 | 
| 
 | 
   494         tmp_bam_name = '%s.bam' % tmp_name
 | 
| 
 | 
   495         tmp_bai_name = '%s.bai' % tmp_bam_name
 | 
| 
 | 
   496         os.symlink( opts.input, tmp_bam_name )
 | 
| 
 | 
   497         os.symlink( opts.bai_file, tmp_bai_name )
 | 
| 
 | 
   498         cl.append('INPUT=%s' % ( tmp_bam_name ))
 | 
| 
 | 
   499         pic.delme.append(tmp_bam_name)
 | 
| 
 | 
   500         pic.delme.append(tmp_bai_name)
 | 
| 
 | 
   501         pic.delme.append(tmp_name)
 | 
| 
 | 
   502         s = pic.runPic( opts.jar, cl )
 | 
| 
 | 
   503         f = open(pic.metricsOut,'a')
 | 
| 
 | 
   504         f.write(s) # got this on stdout from runCl
 | 
| 
 | 
   505         f.write('\n')
 | 
| 
 | 
   506         f.close()
 | 
| 
 | 
   507         doTranspose = False # but not transposed
 | 
| 
 | 
   508 
 | 
| 
 | 
   509     elif pic.picname == 'EstimateLibraryComplexity':
 | 
| 
 | 
   510         cl.append('I=%s' % opts.input)
 | 
| 
 | 
   511         cl.append('O=%s' % pic.metricsOut)
 | 
| 
 | 
   512         if float(opts.minid) > 0:
 | 
| 
 | 
   513             cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid)
 | 
| 
 | 
   514         if float(opts.maxdiff) > 0.0:
 | 
| 
 | 
   515             cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff)
 | 
| 
 | 
   516         if float(opts.minmeanq) > 0:
 | 
| 
 | 
   517             cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq)
 | 
| 
 | 
   518         if opts.readregex > '':
 | 
| 
 | 
   519             cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
 | 
| 
 | 
   520         if float(opts.optdupdist) > 0:
 | 
| 
 | 
   521             cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
 | 
| 
 | 
   522         pic.runPic(opts.jar,cl)
 | 
| 
 | 
   523 
 | 
| 
 | 
   524     elif pic.picname == 'CollectAlignmentSummaryMetrics':
 | 
| 
 | 
   525         # Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
 | 
| 
 | 
   526         # why? Dunno Seems to work without complaining if the .bai file is AWOL....
 | 
| 
 | 
   527         fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
 | 
| 
 | 
   528         try:
 | 
| 
 | 
   529             os.symlink(ref_file_name,fakefasta)
 | 
| 
 | 
   530         except:
 | 
| 
 | 
   531             s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
 | 
| 
 | 
   532             info = s
 | 
| 
 | 
   533             shutil.copy(ref_file_name,fakefasta)
 | 
| 
 | 
   534         pic.delme.append(fakefasta)
 | 
| 
 | 
   535         cl.append('ASSUME_SORTED=%s' % opts.assumesorted)
 | 
| 
 | 
   536         adaptorseqs = ''.join([' ADAPTER_SEQUENCE=%s' % x for x in opts.adaptors])
 | 
| 
 | 
   537         cl.append(adaptorseqs)
 | 
| 
 | 
   538         cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite)
 | 
| 
 | 
   539         cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert)
 | 
| 
 | 
   540         cl.append('OUTPUT=%s' % pic.metricsOut)
 | 
| 
 | 
   541         cl.append('R=%s' % fakefasta)
 | 
| 
 | 
   542         cl.append('TMP_DIR=%s' % opts.tmpdir)
 | 
| 
 | 
   543         if not opts.assumesorted.lower() == 'true': # we need to sort input
 | 
| 
 | 
   544             fakeinput = '%s.sorted' % opts.input
 | 
| 
 | 
   545             s = pic.sortSam(opts.input, fakeinput, opts.outdir)
 | 
| 
 | 
   546             pic.delme.append(fakeinput)
 | 
| 
 | 
   547             cl.append('INPUT=%s' % fakeinput)
 | 
| 
 | 
   548         else:
 | 
| 
 | 
   549             cl.append('INPUT=%s' % os.path.abspath(opts.input)) 
 | 
| 
 | 
   550         pic.runPic(opts.jar,cl)
 | 
| 
 | 
   551        
 | 
| 
 | 
   552         
 | 
| 
 | 
   553     elif pic.picname == 'CollectGcBiasMetrics':
 | 
| 
 | 
   554         assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name
 | 
| 
 | 
   555         # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
 | 
| 
 | 
   556         # why? Dunno 
 | 
| 
 | 
   557         fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
 | 
| 
 | 
   558         try:
 | 
| 
 | 
   559             os.symlink(ref_file_name,fakefasta)
 | 
| 
 | 
   560         except:
 | 
| 
 | 
   561             s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
 | 
| 
 | 
   562             info = s
 | 
| 
 | 
   563             shutil.copy(ref_file_name,fakefasta)
 | 
| 
 | 
   564         pic.delme.append(fakefasta)
 | 
| 
 | 
   565         x = 'rgPicardGCBiasMetrics'
 | 
| 
 | 
   566         pdfname = '%s.pdf' % x
 | 
| 
 | 
   567         jpgname = '%s.jpg' % x
 | 
| 
 | 
   568         tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out')
 | 
| 
 | 
   569         temppdf = os.path.join(opts.outdir,pdfname)
 | 
| 
 | 
   570         cl.append('R=%s' % fakefasta)
 | 
| 
 | 
   571         cl.append('WINDOW_SIZE=%s' % opts.windowsize)
 | 
| 
 | 
   572         cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac)
 | 
| 
 | 
   573         cl.append('INPUT=%s' % opts.input)
 | 
| 
 | 
   574         cl.append('OUTPUT=%s' % tempout)
 | 
| 
 | 
   575         cl.append('TMP_DIR=%s' % opts.tmpdir)
 | 
| 
 | 
   576         cl.append('CHART_OUTPUT=%s' % temppdf)
 | 
| 
 | 
   577         cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut)
 | 
| 
 | 
   578         pic.runPic(opts.jar,cl)
 | 
| 
 | 
   579         if os.path.isfile(temppdf):
 | 
| 
 | 
   580             cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find
 | 
| 
 | 
   581             s,stdouts = pic.runCL(cl=cl2,output_dir=opts.outdir)
 | 
| 
 | 
   582         else:
 | 
| 
 | 
   583             s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf
 | 
| 
 | 
   584         lf = open(pic.log_filename,'a')
 | 
| 
 | 
   585         lf.write(s)
 | 
| 
 | 
   586         lf.write('\n')
 | 
| 
 | 
   587         lf.close()
 | 
| 
 | 
   588         
 | 
| 
 | 
   589     elif pic.picname == 'CollectInsertSizeMetrics':
 | 
| 
 | 
   590         isPDF = 'InsertSizeHist.pdf'
 | 
| 
 | 
   591         pdfpath = os.path.join(opts.outdir,isPDF)
 | 
| 
 | 
   592         histpdf = 'InsertSizeHist.pdf'
 | 
| 
 | 
   593         cl.append('I=%s' % opts.input)
 | 
| 
 | 
   594         cl.append('O=%s' % pic.metricsOut)
 | 
| 
 | 
   595         cl.append('HISTOGRAM_FILE=%s' % histpdf)
 | 
| 
 | 
   596         if opts.taillimit <> '0':
 | 
| 
 | 
   597             cl.append('TAIL_LIMIT=%s' % opts.taillimit)
 | 
| 
 | 
   598         if  opts.histwidth <> '0':
 | 
| 
 | 
   599             cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth)
 | 
| 
 | 
   600         if float( opts.minpct) > 0.0:
 | 
| 
 | 
   601             cl.append('MINIMUM_PCT=%s' % opts.minpct)
 | 
| 
 | 
   602         pic.runPic(opts.jar,cl)   
 | 
| 
 | 
   603         if os.path.exists(pdfpath): # automake thumbnail - will be added to html 
 | 
| 
 | 
   604             cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
 | 
| 
 | 
   605             s,stdouts = pic.runCL(cl=cl2,output_dir=opts.outdir)
 | 
| 
 | 
   606         else:
 | 
| 
 | 
   607             s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
 | 
| 
 | 
   608             s += 'This <b>always happens if single ended data was provided</b> to this tool,\n'
 | 
| 
 | 
   609             s += 'so please double check that your input data really is paired-end NGS data.<br/>\n'
 | 
| 
 | 
   610             s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>'
 | 
| 
 | 
   611             stdouts = ''
 | 
| 
 | 
   612         logging.info(s)
 | 
| 
 | 
   613         if len(stdouts) > 0:
 | 
| 
 | 
   614            logging.info(stdouts)
 | 
| 
 | 
   615         
 | 
| 
 | 
   616     elif pic.picname == 'MarkDuplicates':
 | 
| 
 | 
   617         # assume sorted even if header says otherwise
 | 
| 
 | 
   618         cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
 | 
| 
 | 
   619         # input
 | 
| 
 | 
   620         cl.append('INPUT=%s' % opts.input)
 | 
| 
 | 
   621         # outputs
 | 
| 
 | 
   622         cl.append('OUTPUT=%s' % opts.output) 
 | 
| 
 | 
   623         cl.append('METRICS_FILE=%s' % pic.metricsOut )
 | 
| 
 | 
   624         # remove or mark duplicates
 | 
| 
 | 
   625         cl.append('REMOVE_DUPLICATES=%s' % opts.remdups)
 | 
| 
 | 
   626         # the regular expression to be used to parse reads in incoming SAM file
 | 
| 
 | 
   627         cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
 | 
| 
 | 
   628         # maximum offset between two duplicate clusters
 | 
| 
 | 
   629         cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
 | 
| 
 | 
   630         pic.runPic(opts.jar, cl)
 | 
| 
 | 
   631 
 | 
| 
 | 
   632     elif pic.picname == 'FixMateInformation':
 | 
| 
 | 
   633         cl.append('I=%s' % opts.input)
 | 
| 
 | 
   634         cl.append('O=%s' % tempout)
 | 
| 
 | 
   635         cl.append('SORT_ORDER=%s' % opts.sortorder)
 | 
| 
 | 
   636         pic.runPic(opts.jar,cl)
 | 
| 
 | 
   637         haveTempout = True
 | 
| 
 | 
   638         
 | 
| 
 | 
   639     elif pic.picname == 'ReorderSam':
 | 
| 
 | 
   640         # input
 | 
| 
 | 
   641         cl.append('INPUT=%s' % opts.input)
 | 
| 
 | 
   642         # output
 | 
| 
 | 
   643         cl.append('OUTPUT=%s' % tempout)
 | 
| 
 | 
   644         # reference
 | 
| 
 | 
   645         cl.append('REFERENCE=%s' % ref_file_name)
 | 
| 
 | 
   646         # incomplete dict concordance
 | 
| 
 | 
   647         if opts.allow_inc_dict_concord == 'true':
 | 
| 
 | 
   648             cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true')
 | 
| 
 | 
   649         # contig length discordance
 | 
| 
 | 
   650         if opts.allow_contig_len_discord == 'true':
 | 
| 
 | 
   651             cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true')
 | 
| 
 | 
   652         pic.runPic(opts.jar, cl)
 | 
| 
 | 
   653         haveTempout = True
 | 
| 
 | 
   654 
 | 
| 
 | 
   655     elif pic.picname == 'ReplaceSamHeader':
 | 
| 
 | 
   656         cl.append('INPUT=%s' % opts.input)
 | 
| 
 | 
   657         cl.append('OUTPUT=%s' % tempout)
 | 
| 
 | 
   658         cl.append('HEADER=%s' % opts.header_file)
 | 
| 
 | 
   659         pic.runPic(opts.jar, cl)
 | 
| 
 | 
   660         haveTempout = True
 | 
| 
 | 
   661 
 | 
| 
 | 
   662     elif pic.picname == 'CalculateHsMetrics':
 | 
| 
 | 
   663         maxloglines = 100
 | 
| 
 | 
   664         baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait')
 | 
| 
 | 
   665         targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target')
 | 
| 
 | 
   666         baitf = pic.makePicInterval(opts.baitbed,baitfname)
 | 
| 
 | 
   667         if opts.targetbed == opts.baitbed: # same file sometimes
 | 
| 
 | 
   668             targetf = baitf
 | 
| 
 | 
   669         else:
 | 
| 
 | 
   670             targetf = pic.makePicInterval(opts.targetbed,targetfname)   
 | 
| 
 | 
   671         cl.append('BAIT_INTERVALS=%s' % baitf)
 | 
| 
 | 
   672         cl.append('TARGET_INTERVALS=%s' % targetf)
 | 
| 
 | 
   673         cl.append('INPUT=%s' % os.path.abspath(opts.input))
 | 
| 
 | 
   674         cl.append('OUTPUT=%s' % pic.metricsOut)
 | 
| 
 | 
   675         cl.append('TMP_DIR=%s' % opts.tmpdir)
 | 
| 
 | 
   676         pic.runPic(opts.jar,cl)
 | 
| 
 | 
   677            
 | 
| 
 | 
   678     elif pic.picname == 'ValidateSamFile':
 | 
| 
 | 
   679         import pysam
 | 
| 
 | 
   680         doTranspose = False
 | 
| 
 | 
   681         sortedfile = os.path.join(opts.outdir,'rgValidate.sorted')
 | 
| 
 | 
   682         stf = open(pic.log_filename,'w')
 | 
| 
 | 
   683         tlog = None
 | 
| 
 | 
   684         if opts.datatype == 'sam': # need to work with a bam 
 | 
| 
 | 
   685             tlog,tempbam = pic.samToBam(opts.input,opts.outdir)
 | 
| 
 | 
   686             try:
 | 
| 
 | 
   687                 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
 | 
| 
 | 
   688             except:
 | 
| 
 | 
   689                 print '## exception on sorting sam file %s' % opts.input
 | 
| 
 | 
   690         else: # is already bam
 | 
| 
 | 
   691             try:
 | 
| 
 | 
   692                 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
 | 
| 
 | 
   693             except: # bug - [bam_sort_core] not being ignored - TODO fixme
 | 
| 
 | 
   694                 print '## exception on sorting bam file %s' % opts.input
 | 
| 
 | 
   695         if tlog:
 | 
| 
 | 
   696             print '##tlog=',tlog
 | 
| 
 | 
   697             stf.write(tlog)
 | 
| 
 | 
   698             stf.write('\n')
 | 
| 
 | 
   699         sortedfile = '%s.bam' % sortedfile # samtools does that      
 | 
| 
 | 
   700         cl.append('O=%s' % pic.metricsOut)
 | 
| 
 | 
   701         cl.append('TMP_DIR=%s' % opts.tmpdir)
 | 
| 
 | 
   702         cl.append('I=%s' % sortedfile)
 | 
| 
 | 
   703         opts.maxerrors = '99999999'
 | 
| 
 | 
   704         cl.append('MAX_OUTPUT=%s' % opts.maxerrors)
 | 
| 
 | 
   705         if opts.ignoreflags[0] <> 'None': # picard error values to ignore
 | 
| 
 | 
   706             igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None']
 | 
| 
 | 
   707             cl.append(' '.join(igs))
 | 
| 
 | 
   708         if opts.bisulphite.lower() <> 'false':
 | 
| 
 | 
   709             cl.append('IS_BISULFITE_SEQUENCED=true')
 | 
| 
 | 
   710         if opts.ref <> None or opts.ref_file <> None:
 | 
| 
 | 
   711             cl.append('R=%s' %  ref_file_name)
 | 
| 
 | 
   712         pic.runPic(opts.jar,cl)
 | 
| 
 | 
   713         if opts.datatype == 'sam':
 | 
| 
 | 
   714             pic.delme.append(tempbam)
 | 
| 
 | 
   715         newsam = opts.output
 | 
| 
 | 
   716         outformat = 'bam'
 | 
| 
 | 
   717         pe = open(pic.metricsOut,'r').readlines()
 | 
| 
 | 
   718         pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat)
 | 
| 
 | 
   719         pic.delme.append(sortedfile) # not wanted
 | 
| 
 | 
   720         stf.close()
 | 
| 
 | 
   721         pic.cleanup()
 | 
| 
 | 
   722     else:
 | 
| 
 | 
   723         print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname
 | 
| 
 | 
   724         sys.exit(1)
 | 
| 
 | 
   725     if haveTempout:
 | 
| 
 | 
   726         # Some Picard tools produced a potentially intermediate bam file. 
 | 
| 
 | 
   727         # Either just move to final location or create sam
 | 
| 
 | 
   728         shutil.move(tempout, os.path.abspath(opts.output))
 | 
| 
 | 
   729 
 | 
| 
 | 
   730     if opts.htmlout <> None or doFix: # return a pretty html page
 | 
| 
 | 
   731         pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines)
 | 
| 
 | 
   732 
 | 
| 
 | 
   733 if __name__=="__main__": __main__()
 | 
| 
 | 
   734 
 |