Commit message:
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository |
added:
fastqc/README fastqc/fastqc.py fastqc/fastqc.xml |
b |
diff -r 000000000000 -r 1d373f219445 fastqc/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc/README Tue Jun 07 17:22:05 2011 -0400 |
b |
@@ -0,0 +1,39 @@ + +FastQC +------ + +From the FastQC website http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/ + +Function A quality control tool for high throughput sequence data. +Language Java +Requirements A suitable Java Runtime Environment + The Picard BAM/SAM Libraries (included in download) +Code Maturity Stable. Mature code, but feedback is appreciated. +Code Released Yes, under GPL v3 or later. +Initial Contact Simon Andrews + +FastQC aims to provide a simple way to do some quality control checks on raw sequence data coming from high throughput sequencing pipelines. It provides a modular set of analyses which you can use to give a quick impression of whether your data has any problems of which you should be aware before doing any further analysis. + +The main functions of FastQC are: + +- Import of data from BAM, SAM or FastQ files (any variant) +- Providing a quick overview to tell you in which areas there may be problems +- Summary graphs and tables to quickly assess your data +- Export of results to an HTML based permanent report +- Offline operation to allow automated generation of reports without running the interactive application + +Download and installation information is at: http://www.bioinformatics.bbsrc.ac.uk/projects/download.html#fastqc + + +Galaxy Tool Wrapper +------------------- + +The galaxy tool wrapper for FastQC requires version: FastQC v0.7.2 + +FastQC should be downloaded and installed on the system on which it will be executed. +The PATH environment variable should include the directory in which the fastqc script resides. + +The fastqc.py wrapper invokes the fastqc script provided in FastQC download, +and converts the FastQC results into a Galaxy html formatted dataset. + + |
b |
diff -r 000000000000 -r 1d373f219445 fastqc/fastqc.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc/fastqc.py Tue Jun 07 17:22:05 2011 -0400 |
[ |
b'@@ -0,0 +1,167 @@\n+#!/usr/bin/env python\n+\n+"""\n+Runs FastQC on a fastq file;\n+TODO: more documentation\n+\n+usage: fastqc.py [options]\n+ -i, --input=i: The fastq input file\n+ -n, --name=n: The fastq input name\n+ -c, --contaminants=c: A contaminants file\n+ -r, --report=r: The html summary report file\n+ -D, --dir=D: The dir for report files\n+ -d, --data=d: The data output text file\n+"""\n+\n+import optparse, os, shutil, subprocess, sys, tempfile, re, string\n+\n+def stop_err( msg ):\n+ sys.stderr.write( \'%s\\n\' % msg )\n+ sys.exit()\n+\n+def __main__():\n+ #Parse Command Line\n+ parser = optparse.OptionParser()\n+ parser.add_option( \'-i\', \'--input\', dest=\'input\', help=\'The sequence input file\' )\n+ parser.add_option( \'-f\', \'--format\', dest=\'format\', help=\'The sequence input file format\' )\n+ parser.add_option( \'-n\', \'--name\', dest=\'name\', help=\'The fastq input name\' )\n+ parser.add_option( \'-c\', \'--contaminants\', dest=\'contaminants\', help=\'A contaminants file\' )\n+ parser.add_option( \'-r\', \'--report\', dest=\'report\', help=\'The HTML report\' )\n+ parser.add_option( \'-D\', \'--dir\', dest=\'outdir\', help=\'The dir for report files\' )\n+ parser.add_option( \'-d\', \'--data\', dest=\'data\', help=\'The output data text file\' )\n+ (options, args) = parser.parse_args()\n+ if options.input == None:\n+ stop_err("Misssing option --input")\n+ params = []\n+ #params.append(\'-Xmx250m\')\n+ params.append(\'-Djava.awt.headless=true\')\n+ name = \'input\'\n+ format = \'fastq\'\n+ if options.outdir != None:\n+ os.makedirs(options.outdir)\n+ if options.contaminants != None and options.contaminants != \'None\':\n+ params.append("-c %s" % options.contaminants)\n+ if options.name != None and options.name != \'None\':\n+ name = re.sub(\'[^a-zA-Z0-9_.-]\',\'_\',options.name)\n+ if options.format != None and options.format != \'None\':\n+ format = options.format\n+ params.append("-f %s" % options.format)\n+ # FastQC relies on the extension to determine file format .sam .bam or .fastq\n+ if not name.endswith(\'.\'+format):\n+ name = \'.\'.join((name,format))\n+ # make temp directory\n+ buffsize = 1048576\n+ tmp_dir = tempfile.mkdtemp()\n+ params.append("-o %s" % tmp_dir)\n+ # print("tmp_dir %s" % tmp_dir)\n+ try:\n+ # make a link to the input fastq in the tmp_dir\n+ # FastQC will generate output in the same dir that it finds its input\n+ fastq = os.path.join(tmp_dir,name) \n+ os.symlink( options.input, fastq)\n+ # generate commandline\n+ cmd = \'fastqc %s %s\' % (\' \'.join(params),fastq)\n+ # need to nest try-except in try-finally to handle 2.4\n+ try:\n+ try:\n+ tmp_stderr_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix=\'.err\' ).name\n+ tmp_stderr = open( tmp_stderr_name, \'wb\' )\n+ tmp_stdout_name = tempfile.NamedTemporaryFile( dir=tmp_dir,suffix=\'.out\' ).name\n+ tmp_stdout = open( tmp_stdout_name, \'wb\' )\n+ proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno(), stdout=tmp_stdout.fileno() )\n+ returncode = proc.wait()\n+ tmp_stderr.close()\n+ # get stderr, allowing for case where it\'s very large\n+ tmp_stderr = open( tmp_stderr_name, \'rb\' )\n+ stderr = \'\'\n+ try:\n+ while True:\n+ stderr += tmp_stderr.read( buffsize )\n+ if not stderr or len( stderr ) % buffsize != 0:\n+ break\n+ except OverflowError:\n+ pass\n+ tmp_stderr.close()\n+ if returncode != 0:\n+ raise Exception, stderr\n+ except Exception, e:\n+ raise Exception, \'Error executing FastQC. \' + str( e )\n+ # remove the input file symlink so it does get copied\n+ os.remo'..b' tests = []\n+ # move result to outdir \n+ # Need to flatten the dir hierachy in order for galaxy to serve the href links\n+ for root, dirs, files in os.walk(tmp_dir):\n+ for fname in files:\n+ path = os.path.join(root,fname)\n+ # print("%s" % fname)\n+ if re.match(\'.+\\.zip\',fname):\n+ pass\n+ elif fname == \'fastqc_report.html\':\n+ if options.outdir != None:\n+ fsrc = open(path,\'r\')\n+ # fdst = open(os.path.join(options.outdir,fname),\'w\')\n+ fdst = open(options.report,\'w\')\n+ try:\n+ for line in fsrc:\n+ if line.find(\'footer\') > 0:\n+ # add extra links in case someone prefers raw text\n+ fdst.write(\'<p><a href="summary.txt">FastQC Summary text report</a>\')\n+ fdst.write(\'<p><a href="fastqc_data.txt">FastQC Report Data</a>\')\n+ # copy lines removing subdirs from links\n+ fdst.write(re.sub(\'Icons/|Images/\',\'\',line))\n+ finally:\n+ fsrc.close()\n+ fdst.close() \n+ else:\n+ if options.outdir != None:\n+ shutil.copy(path,options.outdir)\n+ if fname == \'summary.txt\':\n+ # Use the contents of this file to put stdout info into the HistoryDataset panel\n+ fsrc = open(path,\'r\')\n+ try:\n+ for line in fsrc:\n+ (grade,test,seq) = string.split(line,\'\t\')\n+ tests.append("%s %s" % (\'+\' if grade == \'PASS\' else \'-\',re.sub(\'equence\',\'eq\',test)))\n+ finally:\n+ fsrc.close()\n+ elif fname == \'fastqc_data.txt\':\n+ if options.data != None:\n+ # copy the fastqc_data.txt file to the dataset data \n+ shutil.copy(path,options.data)\n+ cnt = \'?\'\n+ flen = \'?\'\n+ gc = \'?\'\n+ fsrc = open(path,\'r\')\n+ try:\n+ for line in fsrc:\n+ m = re.match(\'^Total Sequences\t(\\d+)\',line)\n+ if m:\n+ cnt = m.groups()[-1]\n+ m = re.match(\'^Sequence length\t(\\d+)\',line)\n+ if m:\n+ flen = m.groups()[-1]\n+ m = re.match(\'^%GC\t(\\d+)\',line)\n+ if m:\n+ gc = m.groups()[-1]\n+ finally:\n+ fsrc.close()\n+ #print to stdout so that this appears in the tool dataset info\n+ print("Seqs %s, Len %s, GC %s" %(cnt,flen,gc)) \n+ #print to stdout so that this appears in the tool dataset info\n+ print("%s" % \'\\n\'.join(tests))\n+ except Exception, e:\n+ stop_err( \'Fastq failed.\\n\' + str( e ) )\n+ finally:\n+ # clean up temp dir, put in a try block so we don\'t fail on stale nfs handles\n+ try: \n+ if os.path.exists( tmp_dir ):\n+ shutil.rmtree( tmp_dir )\n+ except Exception, e:\n+ pass\n+\n+if __name__=="__main__": __main__()\n' |
b |
diff -r 000000000000 -r 1d373f219445 fastqc/fastqc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastqc/fastqc.xml Tue Jun 07 17:22:05 2011 -0400 |
[ |
@@ -0,0 +1,94 @@ +<tool id="FastQC" name="FastQC" version="1.0.0"> + <description>quality control checks on raw sequence data</description> + <command interpreter="python">fastqc.py + #if $input.extension.startswith( "fastq"): + --format=fastq + #else + --format=$input.extension + #end if + --input='$input' + --name='$input.name' + --dir='$report.extra_files_path' + --report='$report' + #if $contaminants != None and $contaminants != "None" and $contaminants != "": + --contaminants=$contaminants + #end if + </command> + <inputs> + <param name="input" type="data" format="fastq,sam,bam" label="FASTQ reads" /> + <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminants" + help="Two fields per line separated by a TAB: name DNA_sequence. For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA"/> + </inputs> + <outputs> + <data name="report" format="html" /> + </outputs> + <tests> + <!-- + <test> + <param name="input1_file" value="3.fastqsanger" ftype="fastqsanger" /> + <output name="output1_file" file="split_pair_reads_1.fastqsanger" /> + <output name="output2_file" file="split_pair_reads_2.fastqsanger" /> + </test> + --> + </tests> + <help> +**What it does** + +FastQC_ is a product of Bioinformatics Group at the Babraham Institute. FastQC aims to provide a simple way to do some quality control checks on raw sequence data coming from high throughput sequencing pipelines. It provides a modular set of analyses which you can use to give a quick impression of whether your data has any problems of which you should be aware before doing any further analysis. + +The main functions of FastQC are:: + + - Import of data from BAM, SAM or FastQ files (any variant) + - Provding a quick overview to tell you in which areas there may be problems + - Summary graphs and tables to quickly assess your data + - Export of results to an HTML based permanent report + - Offline operation to allow automated generation of reports without running the interactive application + + +.. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/ + +----- + +**Input format** + +Any fastq file, for example:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +**Contaminants format** + +An optional contaminant file (otherwise FastQC will use the default):: + + # This file contains a list of potential contaminants which are + # frequently found in high throughput sequencing reactions. These + # are mostly sequences of adapters / primers used in the various + # sequencing chemistries. + # + # You can add more sequences to the file by putting one line per entry + # and specifying a name[tab]sequence. If the contaminant you add is + # likely to be of use to others please consider sending it to the FastQ + # authors, either via a bug report at www.bioinformatics.bbsrc.ac.uk/bugzilla/ + # or by directly emailing simon.andrews@bbsrc.ac.uk so other users of + # the program can benefit. + Illumina Single End Apapter 1 ACACTCTTTCCCTACACGACGCTGTTCCATCT + Illumina Single End Apapter 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT + Illumina Single End PCR Primer 1 AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT + Illumina Single End PCR Primer 2 CAAGCAGAAGACGGCATACGAGCTCTTCCGATCT + Illumina Single End Sequencing Primer ACACTCTTTCCCTACACGACGCTCTTCCGATCT + + +----- + +**Outputs** + +An HTML file with links to:: + + - fastqc_report.html + - summary.txt + - fastqc_data.txt + + </help> +</tool> |