diff rgFastQC.py @ 10:a00a6402d09a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit 2bfbb5ae6b801e43355fdc3f964a5111fe3fe3a1
author iuc
date Wed, 08 Feb 2017 12:43:43 -0500
parents 3a458e268066
children db2dc6bc8f05
line wrap: on
line diff
--- a/rgFastQC.py	Wed Nov 02 16:12:51 2016 -0400
+++ b/rgFastQC.py	Wed Feb 08 12:43:43 2017 -0500
@@ -15,30 +15,28 @@
 
     rgFastQC.py -i path/dataset_1.dat -j 1000gsample.fastq -o path/dataset_3.dat -d path/job_working_directory/subfolder
         -f fastq -n FastQC -c path/dataset_2.dat -e fastqc
-
 """
-
+import bz2
+import glob
+import gzip
+import mimetypes
+import optparse
+import os
 import re
-import os
 import shutil
 import subprocess
-import optparse
 import tempfile
-import glob
-import gzip
-import bz2
 import zipfile
-import mimetypes
+
 
 class FastQCRunner(object):
-
-    def __init__(self,opts=None):
+    def __init__(self, opts=None):
         '''
         Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc()
         '''
 
         # Check whether the options are specified and saves them into the object
-        assert opts != None
+        assert opts is not None
         self.opts = opts
 
     def prepare_command_line(self):
@@ -62,7 +60,7 @@
                 trimext = True
             f.close()
         elif linf.endswith('bz2'):
-            f = bz2.open(self.opts.input,'rb')
+            f = bz2.BZ2File(self.opts.input, 'r')
             try:
                 f.readline()
             except:
@@ -72,35 +70,35 @@
             if not zipfile.is_zipfile(self.opts.input):
                 trimext = True
         if trimext:
-	   f = open(self.opts.input)
-	   try:
-	       f.readline()
-	   except:
-	       raise Exception("Input file corruption, could not identify the filetype")
-           infname = os.path.splitext(infname)[0]
+            f = open(self.opts.input)
+            try:
+                f.readline()
+            except:
+                raise Exception("Input file corruption, could not identify the filetype")
+            infname = os.path.splitext(infname)[0]
 
         # Replace unwanted or problematic charaters in the input file name
-        self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
+        self.fastqinfilename = re.sub(r'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
         # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise
-        if 'fastq' in opts.informat:
+        if 'fastq' in self.opts.informat:
             # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's
             # accepted formats..
-            opts.informat = 'fastq'
-        elif not self.fastqinfilename.endswith(opts.informat):
-            self.fastqinfilename += '.%s' % opts.informat
+            self.opts.informat = 'fastq'
+        elif not self.fastqinfilename.endswith(self.opts.informat):
+            self.fastqinfilename += '.%s' % self.opts.informat
 
         # Build the Commandline from the given parameters
-        command_line = [opts.executable, '--outdir %s' % opts.outputdir]
-        if opts.contaminants != None:
-            command_line.append('--contaminants %s' % opts.contaminants)
-        if opts.limits != None:
-	    command_line.append('--limits %s' % opts.limits)
+        command_line = [opts.executable, '--outdir %s' % self.opts.outputdir]
+        if self.opts.contaminants is not None:
+            command_line.append('--contaminants %s' % self.opts.contaminants)
+        if self.opts.limits is not None:
+            command_line.append('--limits %s' % self.opts.limits)
         command_line.append('--quiet')
-        command_line.append('--extract') # to access the output text file
-	if type[-1] != "gzip":
-            command_line.append('-f %s' % opts.informat)
-	else:
-	    self.fastqinfilename += ".gz"
+        command_line.append('--extract')  # to access the output text file
+        if type[-1] != "gzip":
+            command_line.append('-f %s' % self.opts.informat)
+        else:
+            self.fastqinfilename += ".gz"
         command_line.append(self.fastqinfilename)
         self.command_line = ' '.join(command_line)
 
@@ -110,30 +108,30 @@
         '''
 
         # retrieve html file
-        result_file = glob.glob(opts.outputdir + '/*html')
+        result_file = glob.glob(self.opts.outputdir + '/*html')
         with open(result_file[0], 'rb') as fsrc:
             with open(self.opts.htmloutput, 'wb') as fdest:
                 shutil.copyfileobj(fsrc, fdest)
 
         # retrieve text file
-        text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt')
+        text_file = glob.glob(self.opts.outputdir + '/*/fastqc_data.txt')
         with open(text_file[0], 'rb') as fsrc:
             with open(self.opts.textoutput, 'wb') as fdest:
                 shutil.copyfileobj(fsrc, fdest)
 
     def run_fastqc(self):
         '''
-        Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts)
+        Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options
         '''
 
         # Create a log file
-        dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir)
+        dummy, tlog = tempfile.mkstemp(prefix='rgFastQC', suffix=".log", dir=self.opts.outputdir)
         sout = open(tlog, 'w')
 
         self.prepare_command_line()
         sout.write(self.command_line)
         sout.write('\n')
-        sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name
+        sout.write("Creating symlink\n")  # between the input (.dat) file and the given input file name
         os.symlink(self.opts.input, self.fastqinfilename)
         sout.write("check_call\n")
         subprocess.check_call(self.command_line, shell=True)
@@ -142,6 +140,7 @@
         sout.write("Finished")
         sout.close()
 
+
 if __name__ == '__main__':
     op = optparse.OptionParser()
     op.add_option('-i', '--input', default=None)
@@ -156,9 +155,9 @@
     op.add_option('-e', '--executable', default='fastqc')
     opts, args = op.parse_args()
 
-    assert opts.input != None
-    assert opts.inputfilename != None
-    assert opts.htmloutput != None
+    assert opts.input is not None
+    assert opts.inputfilename is not None
+    assert opts.htmloutput is not None
     if not os.path.exists(opts.outputdir):
         os.makedirs(opts.outputdir)