# HG changeset patch
# User iuc
# Date 1452725855 18000
# Node ID 5fb45d8bbc075a671e4c6c6495f0ee3fbc362932
# Parent 2611a96c30b708268b0bcc27942cbb95c30acc7a
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit dd2bada7005c72c16cd3ea047cdc64896c1f8977
diff -r 2611a96c30b7 -r 5fb45d8bbc07 rgFastQC.py
--- a/rgFastQC.py Sat Jan 18 22:33:36 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,233 +0,0 @@
-"""
-# May 2013 ross added check for bogus gz extension - fastqc gets confused
-# added sanitizer for user supplied name
-# removed shell and make cl a sequence for Popen call
-# ross lazarus August 10 2012 in response to anon insecurity report
-wrapper for fastqc
-
-called as
-
- rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
-
-
-
-
-Current release seems overly intolerant of sam/bam header strangeness
-Author notified...
-
-
-"""
-import re
-import os
-import sys
-import subprocess
-import optparse
-import shutil
-import tempfile
-import zipfile
-import gzip
-
-def pathfind(program):
- """ toolshed path munging isn't so try to work around june 5 2013
- """
- def is_exe(fpath):
- return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
-
- fpath, fname = os.path.split(program)
- if fpath:
- if is_exe(program):
- return program
- else:
- for path in os.environ["PATH"].split(os.pathsep):
- path = path.strip('"')
- exe_file = os.path.join(path, program)
- if is_exe(exe_file):
- return exe_file
-
- return None
-
-class FastQC():
- """wrapper
- """
-
-
- def __init__(self,opts=None):
- assert opts <> None
- self.opts = opts
- fastqcexe = pathfind(opts.executable)
- assert (fastqcexe != None),'##rgFastQC.py error - cannot find passed fastqc executable %s in path %s' % (opts.executable,os.environ['PATH'])
- self.fastqcexe = fastqcexe
-
- def getFileString(self, fpath, outpath):
- """
- format a nice file size string
- """
- size = ''
- fp = os.path.join(outpath, fpath)
- s = fpath
- if os.path.isfile(fp):
- n = float(os.path.getsize(fp))
- if n > 2**20:
- size = ' (%1.1f MB)' % (n/2**20)
- elif n > 2**10:
- size = ' (%1.1f KB)' % (n/2**10)
- elif n > 0:
- size = ' (%d B)' % (int(n))
- s = '%s %s' % (fpath, size)
- return s
-
- def run_fastqc(self):
- """
- In batch mode fastqc behaves not very nicely - will write to a new folder in
- the same place as the infile called [infilebasename]_fastqc
- rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
- duplication_levels.png fastqc_icon.png per_base_n_content.png per_sequence_gc_content.png summary.txt
- error.png fastqc_report.html per_base_quality.png per_sequence_quality.png tick.png
- fastqc_data.txt per_base_gc_content.png per_base_sequence_content.png sequence_length_distribution.png warning.png
-
- """
- serr = ''
- dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir)
- sout = open(tlog, 'w')
- fastq = os.path.basename(self.opts.input)
- cl = [self.fastqcexe,'--outdir=%s' % self.opts.outputdir]
- if self.opts.informat in ['sam','bam']:
- cl.append('--f=%s' % self.opts.informat)
- if self.opts.contaminants <> None :
- cl.append('--contaminants=%s' % self.opts.contaminants)
- # patch suggested by bwlang https://bitbucket.org/galaxy/galaxy-central/pull-request/30
- # use a symlink in a temporary directory so that the FastQC report reflects the history input file name
- # note this exposes a bug in the EBI_SRA download tool which leaves bogus .gz extensions on uncompressed files
- # which fastqc helpfully tries to uncompress again - hilarity ensues.
- # patched may 29 2013 until this is fixed properly
- infname = self.opts.inputfilename
- linf = infname.lower()
- trimext = False
- if ( linf.endswith('.gz') or linf.endswith('.gzip') ):
- f = gzip.open(self.opts.input)
- try:
- testrow = f.readline()
- except:
- trimext = True
- f.close()
- elif linf.endswith('bz2'):
- f = bz2.open(self.opts.input,'rb')
- try:
- f.readline()
- except:
- trimext = True
- f.close()
- elif linf.endswith('.zip'):
- if not zipfile.is_zipfile(self.opts.input):
- trimext = True
- if trimext:
- infname = os.path.splitext(infname)[0]
- fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
- link_name = os.path.join(self.opts.outputdir, fastqinfilename)
- os.symlink(self.opts.input, link_name)
- cl.append(link_name)
- sout.write('# FastQC cl = %s\n' % ' '.join(cl))
- sout.flush()
- p = subprocess.Popen(cl, shell=False, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
- retval = p.wait()
- sout.close()
- runlog = open(tlog,'r').readlines()
- os.unlink(link_name)
- flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
- odpath = None
- for f in flist:
- d = os.path.join(self.opts.outputdir,f)
- if os.path.isdir(d):
- if d.endswith('_fastqc'):
- odpath = d
- hpath = None
- if odpath <> None:
- try:
- hpath = os.path.join(odpath,'fastqc_report.html')
- rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the
tag
- except:
- pass
- if hpath == None:
- serr = '\n'.join(runlog)
- res = ['## odpath=%s: No output found in %s. Output for the run was:Files created by FastQC
\n']
- flist.sort()
- for i,f in enumerate(flist):
- if not(os.path.isdir(f)):
- fn = os.path.split(f)[-1]
- res.append('%s |
\n' % (fn,self.getFileString(fn, self.opts.outputdir)))
- res.append('
\n')
- res.append('
FastQC documentation and full attribution is here
\n')
- res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n
')
- res.append(footer)
- fixed = rep[:bodyindex] + res + rep[bodyindex:]
- return fixed # with our additions
-
-
- def fix_fastqcimages(self,odpath):
- """ Galaxy wants everything in the same files_dir
- """
- icpath = os.path.join(odpath,'Icons')
- impath = os.path.join(odpath,'Images')
- for adir in [icpath,impath,odpath]:
- if os.path.exists(adir):
- flist = os.listdir(adir) # get all files created
- for f in flist:
- if not os.path.isdir(os.path.join(adir,f)):
- sauce = os.path.join(adir,f)
- dest = os.path.join(self.opts.outputdir,f)
- shutil.move(sauce,dest)
- os.rmdir(adir)
-
-
-if __name__ == '__main__':
- op = optparse.OptionParser()
- op.add_option('-i', '--input', default=None)
- op.add_option('-j', '--inputfilename', default=None)
- op.add_option('-o', '--htmloutput', default=None)
- op.add_option('-d', '--outputdir', default="/tmp/shortread")
- op.add_option('-f', '--informat', default='fastq')
- op.add_option('-n', '--namejob', default='rgFastQC')
- op.add_option('-c', '--contaminants', default=None)
- op.add_option('-e', '--executable', default='fastqc')
- opts, args = op.parse_args()
- assert opts.input <> None
- if not os.path.exists(opts.outputdir):
- os.makedirs(opts.outputdir)
- f = FastQC(opts)
- html,retval,serr = f.run_fastqc()
- f = open(opts.htmloutput, 'w')
- f.write(''.join(html))
- f.close()
- if retval <> 0:
- print >> sys.stderr, serr # indicate failure
-
-
-
diff -r 2611a96c30b7 -r 5fb45d8bbc07 rgFastQC.xml
--- a/rgFastQC.xml Sat Jan 18 22:33:36 2014 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,102 +0,0 @@
-