diff tools/rgenetics/rgFastQC.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/rgenetics/rgFastQC.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,149 @@
+"""
+wrapper for fastqc
+
+called as
+  <command interpreter="python">
+    rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
+  </command>
+
+
+
+Current release seems overly intolerant of sam/bam header strangeness
+Author notified...
+
+
+"""
+
+import os,sys,subprocess,optparse,shutil,tempfile
+from rgutils import getFileString
+
+class FastQC():
+    """wrapper
+    """
+    
+    
+    def __init__(self,opts=None):
+        assert opts <> None
+        self.opts = opts
+        
+        
+    def run_fastqc(self):
+        """
+        In batch mode fastqc behaves not very nicely - will write to a new folder in
+        the same place as the infile called [infilebasename]_fastqc
+    rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
+    duplication_levels.png  fastqc_icon.png          per_base_n_content.png         per_sequence_gc_content.png       summary.txt
+    error.png               fastqc_report.html       per_base_quality.png           per_sequence_quality.png          tick.png
+    fastqc_data.txt         per_base_gc_content.png  per_base_sequence_content.png  sequence_length_distribution.png  warning.png
+
+        """
+        dummy,tlog = tempfile.mkstemp(prefix='rgFastQClog')
+        sout = open(tlog, 'w')
+        fastq = os.path.basename(self.opts.input)
+        cl = [self.opts.executable,'-o %s' % self.opts.outputdir]
+        if self.opts.informat in ['sam','bam']:
+            cl.append('-f %s' % self.opts.informat)
+        if self.opts.contaminants <> None :
+            cl.append('-c %s' % self.opts.contaminants)
+        cl.append(self.opts.input)
+        p = subprocess.Popen(' '.join(cl), shell=True, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
+        return_value = p.wait()
+        sout.close()
+        runlog = open(tlog,'r').readlines()
+        os.unlink(tlog)
+        flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
+        odpath = None
+        for f in flist:
+            d = os.path.join(self.opts.outputdir,f)
+            if os.path.isdir(d):
+                if d.endswith('_fastqc'):
+                    odpath = d 
+        hpath = None
+        if odpath <> None:
+            try: 
+                hpath = os.path.join(odpath,'fastqc_report.html')
+                rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the <body> tag
+            except:
+                pass
+        if hpath == None:        
+            res =  ['## odpath=%s: No output found in %s. Output for the run was:<pre>\n' % (odpath,hpath),]
+            res += runlog
+            res += ['</pre>\n',
+                   'Please read the above for clues<br/>\n',
+                   'If you selected a sam/bam format file, it might not have headers or they may not start with @HD?<br/>\n',
+                   'It is also possible that the log shows that fastqc is not installed?<br/>\n',
+                   'If that is the case, please tell the relevant Galaxy administrator that it can be snarfed from<br/>\n',
+                   'http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/<br/>\n',]
+            return res
+        self.fix_fastqcimages(odpath)
+        flist = os.listdir(self.opts.outputdir) # these have now been fixed
+        excludefiles = ['tick.png','warning.png','fastqc_icon.png','error.png']
+        flist = [x for x in flist if not x in excludefiles]
+        for i in range(len(rep)): # need to fix links to Icons and Image subdirectories in lastest fastqc code - ugh
+            rep[i] = rep[i].replace('Icons/','')
+            rep[i] = rep[i].replace('Images/','')
+
+        html = self.fix_fastqc(rep,flist,runlog)
+        return html
+        
+
+        
+    def fix_fastqc(self,rep=[],flist=[],runlog=[]):
+        """ add some of our stuff to the html
+        """
+        bs = '</body></html>\n' # hope they don't change this
+        try:
+            bodyindex = rep.index(bs) # hope they don't change this
+        except:
+            bodyindex = len(rep) - 1
+        res = []
+        res.append('<table>\n')
+        flist.sort()
+        for i,f in enumerate(flist):
+             if not(os.path.isdir(f)):
+                 fn = os.path.split(f)[-1]
+                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, self.opts.outputdir)))
+        res.append('</table><p/>\n') 
+        res.append('<a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC documentation and full attribution is here</a><br/><hr/>\n')
+        res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n')
+        fixed = rep[:bodyindex] + res + rep[bodyindex:]
+        return fixed # with our additions
+
+
+    def fix_fastqcimages(self,odpath):
+        """ Galaxy wants everything in the same files_dir
+        """
+        icpath = os.path.join(odpath,'Icons')
+        impath = os.path.join(odpath,'Images')
+        for adir in [icpath,impath,odpath]:
+            if os.path.exists(adir):
+                flist = os.listdir(adir) # get all files created
+                for f in flist:
+                   if not os.path.isdir(os.path.join(adir,f)):
+                       sauce = os.path.join(adir,f)
+                       dest = os.path.join(self.opts.outputdir,f)
+                       shutil.move(sauce,dest)
+                os.rmdir(adir)
+
+    
+
+if __name__ == '__main__':
+    op = optparse.OptionParser()
+    op.add_option('-i', '--input', default=None)
+    op.add_option('-o', '--htmloutput', default=None)
+    op.add_option('-d', '--outputdir', default="/tmp/shortread")
+    op.add_option('-f', '--informat', default='fastq')
+    op.add_option('-n', '--namejob', default='rgFastQC')
+    op.add_option('-c', '--contaminants', default=None)
+    op.add_option('-e', '--executable', default='fastqc')
+    opts, args = op.parse_args()
+    assert opts.input <> None
+    assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable
+    if not os.path.exists(opts.outputdir): 
+        os.makedirs(opts.outputdir)
+    f = FastQC(opts)
+    html = f.run_fastqc()
+    f = open(opts.htmloutput, 'w')
+    f.write(''.join(html))
+    f.close()
+