Mercurial > repos > devteam > fastqc
diff rgFastQC.py @ 10:a00a6402d09a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit 2bfbb5ae6b801e43355fdc3f964a5111fe3fe3a1
author | iuc |
---|---|
date | Wed, 08 Feb 2017 12:43:43 -0500 |
parents | 3a458e268066 |
children | db2dc6bc8f05 |
line wrap: on
line diff
--- a/rgFastQC.py Wed Nov 02 16:12:51 2016 -0400 +++ b/rgFastQC.py Wed Feb 08 12:43:43 2017 -0500 @@ -15,30 +15,28 @@ rgFastQC.py -i path/dataset_1.dat -j 1000gsample.fastq -o path/dataset_3.dat -d path/job_working_directory/subfolder -f fastq -n FastQC -c path/dataset_2.dat -e fastqc - """ - +import bz2 +import glob +import gzip +import mimetypes +import optparse +import os import re -import os import shutil import subprocess -import optparse import tempfile -import glob -import gzip -import bz2 import zipfile -import mimetypes + class FastQCRunner(object): - - def __init__(self,opts=None): + def __init__(self, opts=None): ''' Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() ''' # Check whether the options are specified and saves them into the object - assert opts != None + assert opts is not None self.opts = opts def prepare_command_line(self): @@ -62,7 +60,7 @@ trimext = True f.close() elif linf.endswith('bz2'): - f = bz2.open(self.opts.input,'rb') + f = bz2.BZ2File(self.opts.input, 'r') try: f.readline() except: @@ -72,35 +70,35 @@ if not zipfile.is_zipfile(self.opts.input): trimext = True if trimext: - f = open(self.opts.input) - try: - f.readline() - except: - raise Exception("Input file corruption, could not identify the filetype") - infname = os.path.splitext(infname)[0] + f = open(self.opts.input) + try: + f.readline() + except: + raise Exception("Input file corruption, could not identify the filetype") + infname = os.path.splitext(infname)[0] # Replace unwanted or problematic charaters in the input file name - self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) + self.fastqinfilename = re.sub(r'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise - if 'fastq' in opts.informat: + if 'fastq' in self.opts.informat: # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's # accepted formats.. - opts.informat = 'fastq' - elif not self.fastqinfilename.endswith(opts.informat): - self.fastqinfilename += '.%s' % opts.informat + self.opts.informat = 'fastq' + elif not self.fastqinfilename.endswith(self.opts.informat): + self.fastqinfilename += '.%s' % self.opts.informat # Build the Commandline from the given parameters - command_line = [opts.executable, '--outdir %s' % opts.outputdir] - if opts.contaminants != None: - command_line.append('--contaminants %s' % opts.contaminants) - if opts.limits != None: - command_line.append('--limits %s' % opts.limits) + command_line = [opts.executable, '--outdir %s' % self.opts.outputdir] + if self.opts.contaminants is not None: + command_line.append('--contaminants %s' % self.opts.contaminants) + if self.opts.limits is not None: + command_line.append('--limits %s' % self.opts.limits) command_line.append('--quiet') - command_line.append('--extract') # to access the output text file - if type[-1] != "gzip": - command_line.append('-f %s' % opts.informat) - else: - self.fastqinfilename += ".gz" + command_line.append('--extract') # to access the output text file + if type[-1] != "gzip": + command_line.append('-f %s' % self.opts.informat) + else: + self.fastqinfilename += ".gz" command_line.append(self.fastqinfilename) self.command_line = ' '.join(command_line) @@ -110,30 +108,30 @@ ''' # retrieve html file - result_file = glob.glob(opts.outputdir + '/*html') + result_file = glob.glob(self.opts.outputdir + '/*html') with open(result_file[0], 'rb') as fsrc: with open(self.opts.htmloutput, 'wb') as fdest: shutil.copyfileobj(fsrc, fdest) # retrieve text file - text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') + text_file = glob.glob(self.opts.outputdir + '/*/fastqc_data.txt') with open(text_file[0], 'rb') as fsrc: with open(self.opts.textoutput, 'wb') as fdest: shutil.copyfileobj(fsrc, fdest) def run_fastqc(self): ''' - Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) + Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options ''' # Create a log file - dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) + dummy, tlog = tempfile.mkstemp(prefix='rgFastQC', suffix=".log", dir=self.opts.outputdir) sout = open(tlog, 'w') self.prepare_command_line() sout.write(self.command_line) sout.write('\n') - sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name + sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name os.symlink(self.opts.input, self.fastqinfilename) sout.write("check_call\n") subprocess.check_call(self.command_line, shell=True) @@ -142,6 +140,7 @@ sout.write("Finished") sout.close() + if __name__ == '__main__': op = optparse.OptionParser() op.add_option('-i', '--input', default=None) @@ -156,9 +155,9 @@ op.add_option('-e', '--executable', default='fastqc') opts, args = op.parse_args() - assert opts.input != None - assert opts.inputfilename != None - assert opts.htmloutput != None + assert opts.input is not None + assert opts.inputfilename is not None + assert opts.htmloutput is not None if not os.path.exists(opts.outputdir): os.makedirs(opts.outputdir)