"""
wrapper for fastqc
called as
rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
Current release seems overly intolerant of sam/bam header strangeness
Author notified...
"""
import os,sys,subprocess,optparse,shutil,tempfile
from rgutils import getFileString
class FastQC():
"""wrapper
"""
def __init__(self,opts=None):
assert opts <> None
self.opts = opts
def run_fastqc(self):
"""
In batch mode fastqc behaves not very nicely - will write to a new folder in
the same place as the infile called [infilebasename]_fastqc
rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
duplication_levels.png fastqc_icon.png per_base_n_content.png per_sequence_gc_content.png summary.txt
error.png fastqc_report.html per_base_quality.png per_sequence_quality.png tick.png
fastqc_data.txt per_base_gc_content.png per_base_sequence_content.png sequence_length_distribution.png warning.png
"""
dummy,tlog = tempfile.mkstemp(prefix='rgFastQClog')
sout = open(tlog, 'w')
fastq = os.path.basename(self.opts.input)
cl = [self.opts.executable,'-o %s' % self.opts.outputdir]
if self.opts.informat in ['sam','bam']:
cl.append('-f %s' % self.opts.informat)
if self.opts.contaminants <> None :
cl.append('-c %s' % self.opts.contaminants)
cl.append(self.opts.input)
p = subprocess.Popen(' '.join(cl), shell=True, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
return_value = p.wait()
sout.close()
runlog = open(tlog,'r').readlines()
os.unlink(tlog)
flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
odpath = None
for f in flist:
d = os.path.join(self.opts.outputdir,f)
if os.path.isdir(d):
if d.endswith('_fastqc'):
odpath = d
hpath = None
if odpath <> None:
try:
hpath = os.path.join(odpath,'fastqc_report.html')
rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the
tag
except:
pass
if hpath == None:
res = ['## odpath=%s: No output found in %s. Output for the run was:\n' % (odpath,hpath),]
res += runlog
res += ['
\n',
'Please read the above for clues
\n',
'If you selected a sam/bam format file, it might not have headers or they may not start with @HD?
\n',
'It is also possible that the log shows that fastqc is not installed?
\n',
'If that is the case, please tell the relevant Galaxy administrator that it can be snarfed from
\n',
'http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/
\n',]
return res
self.fix_fastqcimages(odpath)
flist = os.listdir(self.opts.outputdir) # these have now been fixed
excludefiles = ['tick.png','warning.png','fastqc_icon.png','error.png']
flist = [x for x in flist if not x in excludefiles]
for i in range(len(rep)): # need to fix links to Icons and Image subdirectories in lastest fastqc code - ugh
rep[i] = rep[i].replace('Icons/','')
rep[i] = rep[i].replace('Images/','')
html = self.fix_fastqc(rep,flist,runlog)
return html
def fix_fastqc(self,rep=[],flist=[],runlog=[]):
""" add some of our stuff to the html
"""
bs = '