Repository 'fastqc'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/fastqc

Changeset 0:e28c965eeed4 (2014-01-27)
Next changeset 1:8fae48caaf06 (2014-11-11)
Commit message:
Imported from capsule None
added:
rgFastQC.py
rgFastQC.xml
test-data/1000gsample.fastq
test-data/fastqc_contaminants.txt
test-data/fastqc_report.html
tool_dependencies.xml
b
diff -r 000000000000 -r e28c965eeed4 rgFastQC.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgFastQC.py Mon Jan 27 09:29:14 2014 -0500
[
b'@@ -0,0 +1,216 @@\n+"""\n+# May 2013 ross added check for bogus gz extension - fastqc gets confused\n+# added sanitizer for user supplied name\n+# removed shell and make cl a sequence for Popen call\n+# ross lazarus August 10 2012 in response to anon insecurity report\n+wrapper for fastqc\n+\n+called as\n+  <command interpreter="python">\n+    rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"\n+  </command>\n+\n+\n+\n+Current release seems overly intolerant of sam/bam header strangeness\n+Author notified...\n+\n+\n+"""\n+import re\n+import os\n+import sys\n+import subprocess\n+import optparse\n+import shutil\n+import tempfile\n+import zipfile\n+import gzip\n+\n+\n+def getFileString(fpath, outpath):\n+    """\n+    format a nice file size string\n+    """\n+    size = \'\'\n+    fp = os.path.join(outpath, fpath)\n+    s = \'? ?\'\n+    if os.path.isfile(fp):\n+        n = float(os.path.getsize(fp))\n+        if n > 2**20:\n+            size = \' (%1.1f MB)\' % (n/2**20)\n+        elif n > 2**10:\n+            size = \' (%1.1f KB)\' % (n/2**10)\n+        elif n > 0:\n+            size = \' (%d B)\' % (int(n))\n+        s = \'%s %s\' % (fpath, size) \n+    return s\n+\n+\n+class FastQC():\n+    """wrapper\n+    """\n+    \n+    \n+    def __init__(self,opts=None):\n+        assert opts <> None\n+        self.opts = opts\n+        \n+        \n+    def run_fastqc(self):\n+        """\n+        In batch mode fastqc behaves not very nicely - will write to a new folder in\n+        the same place as the infile called [infilebasename]_fastqc\n+    rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc\n+    duplication_levels.png  fastqc_icon.png          per_base_n_content.png         per_sequence_gc_content.png       summary.txt\n+    error.png               fastqc_report.html       per_base_quality.png           per_sequence_quality.png          tick.png\n+    fastqc_data.txt         per_base_gc_content.png  per_base_sequence_content.png  sequence_length_distribution.png  warning.png\n+\n+        """\n+        serr = \'\'\n+        dummy,tlog = tempfile.mkstemp(prefix=\'rgFastQC\',suffix=".log",dir=self.opts.outputdir)\n+        sout = open(tlog, \'w\')\n+        fastq = os.path.basename(self.opts.input)\n+        cl = [self.opts.executable,\'--outdir=%s\' % self.opts.outputdir]\n+        if self.opts.informat in [\'sam\',\'bam\']:\n+            cl.append(\'--f=%s\' % self.opts.informat)\n+        if self.opts.contaminants <> None :\n+            cl.append(\'--contaminants=%s\' % self.opts.contaminants)\n+        # patch suggested by bwlang https://bitbucket.org/galaxy/galaxy-central/pull-request/30\n+        # use a symlink in a temporary directory so that the FastQC report reflects the history input file name\n+        infname = self.opts.inputfilename\n+        linf = infname.lower()\n+        trimext = False\n+        # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf\n+        # patched may 29 2013 until this is fixed properly\n+        if ( linf.endswith(\'.gz\') or linf.endswith(\'.gzip\') ): \n+            f = gzip.open(self.opts.input)\n+            try:\n+                testrow = f.readline()\n+            except:\n+                trimext = True\n+            f.close()\n+        elif linf.endswith(\'bz2\'):\n+            f = bz2.open(self.opts.input,\'rb\')\n+            try:\n+                f.readline()\n+            except:\n+                trimext = True\n+            f.close()\n+        elif linf.endswith(\'.zip\'):\n+            if not zipfile.is_zipfile(self.opts.input):\n+                trimext = True\n+        if trimext:\n+            infname = os.path.splitext(infname)[0]\n+        fastqinfilename = re.sub(ur\'[^a-zA-Z0-9_\\-\\.]\', \'_\', os.path.basename(infname))\n+        link_name = os.path.join(self.opts.outputdir, fastqinfilename)\n+        os.symlink(self.opts.input, link_name)\n+        cl.append(link_name)        \n+        sout.write(\'# FastQC cl = %s\\n\' % \' \'.join(cl))\n+        sout.flush()\n+        p = subprocess.Popen(cl, shell=False, stderr=sout, stdout=sout, c'..b']\n+            res += runlog\n+            res += [\'</pre>\\n\',\n+                   \'Please read the above for clues<br/>\\n\',\n+                   \'If you selected a sam/bam format file, it might not have headers or they may not start with @HD?<br/>\\n\',\n+                   \'It is also possible that the log shows that fastqc is not installed?<br/>\\n\',\n+                   \'If that is the case, please tell the relevant Galaxy administrator that it can be snarfed from<br/>\\n\',\n+                   \'http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/<br/>\\n\',]\n+            return res,1,serr\n+        self.fix_fastqcimages(odpath)\n+        flist = os.listdir(self.opts.outputdir) # these have now been fixed\n+        excludefiles = [\'tick.png\',\'warning.png\',\'fastqc_icon.png\',\'error.png\']\n+        flist = [x for x in flist if not x in excludefiles]\n+        for i in range(len(rep)): # need to fix links to Icons and Image subdirectories in lastest fastqc code - ugh\n+            rep[i] = rep[i].replace(\'Icons/\',\'\')\n+            rep[i] = rep[i].replace(\'Images/\',\'\')\n+\n+        html = self.fix_fastqc(rep,flist,runlog)\n+        return html,retval,serr\n+        \n+\n+        \n+    def fix_fastqc(self,rep=[],flist=[],runlog=[]):\n+        """ add some of our stuff to the html\n+        """\n+        bodyindex = len(rep) -1  # hope they don\'t change this\n+        footrow = bodyindex - 1 \n+        footer = rep[footrow]\n+        rep = rep[:footrow] + rep[footrow+1:]\n+        res = [\'<div class="module"><h2>Files created by FastQC</h2><table cellspacing="2" cellpadding="2">\\n\']\n+        flist.sort()\n+        for i,f in enumerate(flist):\n+            if not(os.path.isdir(f)):\n+                fn = os.path.split(f)[-1]\n+                res.append(\'<tr><td><a href="%s">%s</a></td></tr>\\n\' % (fn,getFileString(fn, self.opts.outputdir)))\n+        res.append(\'</table>\\n\') \n+        res.append(\'<a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC documentation and full attribution is here</a><br/><hr/>\\n\')\n+        res.append(\'FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://bitbucket.org/rgenetics for details and licensing\\n</div>\')\n+        res.append(footer)\n+        fixed = rep[:bodyindex] + res + rep[bodyindex:]\n+        return fixed # with our additions\n+\n+\n+    def fix_fastqcimages(self,odpath):\n+        """ Galaxy wants everything in the same files_dir\n+        """\n+        icpath = os.path.join(odpath,\'Icons\')\n+        impath = os.path.join(odpath,\'Images\')\n+        for adir in [icpath,impath,odpath]:\n+            if os.path.exists(adir):\n+                flist = os.listdir(adir) # get all files created\n+                for f in flist:\n+                    if not os.path.isdir(os.path.join(adir,f)):\n+                        sauce = os.path.join(adir,f)\n+                        dest = os.path.join(self.opts.outputdir,f)\n+                        shutil.move(sauce,dest)\n+                os.rmdir(adir)\n+\n+    \n+\n+if __name__ == \'__main__\':\n+    op = optparse.OptionParser()\n+    op.add_option(\'-i\', \'--input\', default=None)\n+    op.add_option(\'-j\', \'--inputfilename\', default=None)    \n+    op.add_option(\'-o\', \'--htmloutput\', default=None)\n+    op.add_option(\'-d\', \'--outputdir\', default="/tmp/shortread")\n+    op.add_option(\'-f\', \'--informat\', default=\'fastq\')\n+    op.add_option(\'-n\', \'--namejob\', default=\'rgFastQC\')\n+    op.add_option(\'-c\', \'--contaminants\', default=None)\n+    op.add_option(\'-e\', \'--executable\', default=\'fastqc\')\n+    opts, args = op.parse_args()\n+    assert opts.input <> None\n+    assert os.path.isfile(opts.executable),\'##rgFastQC.py error - cannot find executable %s\' % opts.executable\n+    if not os.path.exists(opts.outputdir): \n+        os.makedirs(opts.outputdir)\n+    f = FastQC(opts)\n+    html,retval,serr = f.run_fastqc()\n+    f = open(opts.htmloutput, \'w\')\n+    f.write(\'\'.join(html))\n+    f.close()\n+    if retval <> 0:\n+        print >> sys.stderr, serr # indicate failure\n+         \n+    \n+\n'
b
diff -r 000000000000 -r e28c965eeed4 rgFastQC.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rgFastQC.xml Mon Jan 27 09:29:14 2014 -0500
b
@@ -0,0 +1,101 @@
+<tool name="FastQC:Read QC" id="fastqc" version="0.52">
+  <description>reports using FastQC</description>
+  <command interpreter="python">
+    rgFastQC.py -i "$input_file" -d "$html_file.files_path" -o "$html_file" -n "$out_prefix" -f "$input_file.ext" -j "$input_file.name" -e "\$JAVA_JAR_PATH/fastqc"
+#if $contaminants.dataset and str($contaminants) > ''
+-c "$contaminants"
+#end if
+  </command>
+  <requirements>
+    <requirement type="package" version="0.10.1">FastQC</requirement>
+  </requirements>
+  <inputs>
+    <param format="fastqsanger,fastq,bam,sam" name="input_file" type="data" label="Short read data from your current history" />
+    <param name="out_prefix" value="FastQC" type="text" label="Title for the output file - to remind you what the job was for" size="80"
+      help="Letters and numbers only please - other characters will be removed">
+    <sanitizer invalid_char="">
+        <valid initial="string.letters,string.digits"/>
+    </sanitizer>
+    </param>
+    <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" 
+           help="tab delimited file with 2 columns: name and sequence.  For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA"/>
+  </inputs>
+  <outputs>
+    <data format="html" name="html_file"  label="${out_prefix}_${input_file.name}.html" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input_file" value="1000gsample.fastq" />
+      <param name="out_prefix" value="fastqc_out" />
+      <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
+      <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**Purpose**
+
+FastQC aims to provide a simple way to do some quality control checks on raw
+sequence data coming from high throughput sequencing pipelines. 
+It provides a modular set of analyses which you can use to give a quick
+impression of whether your data has any problems of 
+which you should be aware before doing any further analysis.
+
+The main functions of FastQC are:
+
+- Import of data from BAM, SAM or FastQ files (any variant)
+- Providing a quick overview to tell you in which areas there may be problems
+- Summary graphs and tables to quickly assess your data
+- Export of results to an HTML based permanent report
+- Offline operation to allow automated generation of reports without running the interactive application
+
+
+-----
+
+
+.. class:: infomark
+
+**FastQC**
+
+This is a Galaxy wrapper. It merely exposes the external package FastQC_ which is documented at FastQC_
+Kindly acknowledge it as well as this tool if you use it.
+FastQC incorporates the Picard-tools_ libraries for sam/bam processing.
+
+The contaminants file parameter was borrowed from the independently developed
+fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson.
+
+-----
+
+.. class:: infomark
+
+**Inputs and outputs**
+
+FastQC_ is the best place to look for documentation - it's very good. 
+A summary follows below for those in a tearing hurry.
+
+This wrapper will accept a Galaxy fastq, sam or bam as the input read file to check.
+It will also take an optional file containing a list of contaminants information, in the form of
+a tab-delimited file with 2 columns, name and sequence.
+
+The tool produces a single HTML output file that contains all of the results, including the following:
+
+- Basic Statistics
+- Per base sequence quality
+- Per sequence quality scores
+- Per base sequence content
+- Per base GC content
+- Per sequence GC content
+- Per base N content
+- Sequence Length Distribution
+- Sequence Duplication Levels
+- Overrepresented sequences
+- Kmer Content
+
+All except Basic Statistics and Overrepresented sequences are plots.
+ .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/
+ .. _Picard-tools: http://picard.sourceforge.net/index.shtml
+
+</help>
+</tool>
b
diff -r 000000000000 -r e28c965eeed4 test-data/1000gsample.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1000gsample.fastq Mon Jan 27 09:29:14 2014 -0500
b
b"@@ -0,0 +1,20000 @@\n+@ERR005448.1 IL40_2446:5:1:0:964/2\n+GTCTCAGCTGCTTAGTCCAATGGCTCCCACACACACAGAAGCATGGCCAGCTCT\n++\n+40539<=3<.<@@697;A=;79:9<9974.;9:8@=551%0)5+.2%*,02*0)\n+@ERR005448.2 IL40_2446:5:1:0:451/2\n+ATAAAGACCAGCCTGGCCAATGTGGTGAAACCCTGTCTCTACTAAAAGTACAAA\n++\n+>?A?<8=AB?:<B<:9;ABB=<78458:?@<@@<68@@BA><>@@>8/3>9=;?\n+@ERR005448.3 IL40_2446:5:1:0:1205/2\n+ATTCTACTCCAACTGATTCCATTCCATTCTATTCCTTTCTATTCCATTTCATTC\n++\n+?:B7C;6<<;B6/7.09=05973.7864./288235A3&&/244,.056.322.\n+@ERR005448.4 IL40_2446:5:1:0:17/2\n+CACTTCCAGATACTCCAAAAAGAGTGTTTCCAACCTGCTCTAGTAATGGGAATG\n++\n+BBBA=<=B6ABA<BB8>>@7=?>44>8;88=@=6B9??;B=6;8<:BB?@9=<A\n+@ERR005448.5 IL40_2446:5:1:0:719/2\n+ATTCCATCCTTGCAGGTGCACAGCTTTTGTTGCTTCTGCCCTTCCTCTCGCCCT\n++\n+@>@>AA99975,382146;94:2009>5&.8-54;63-005896:3040+3343\n+@ERR005448.6 IL40_2446:5:1:0:156/2\n+CAGGAAAGGGCCTGGTGGGAGGTAATTGGATCATGGGGGTGGTTTCCTCCATGC\n++\n+<:<=;A=4=A?@?4>>?0=84=?AAA>@:8BB<;=09;?'?;4??:A@>@?A>;\n+@ERR005448.8 IL40_2446:5:1:0:1162/2\n+GCTTATCAACAGAAAGCAAGGGTTAGCTATACTGACTTTAGACAGAATAGATTT\n++\n+0=;=;8*<;7;=8A804848)15?:59?=090945?<?<789,+;<5'6)8595\n+@ERR005448.9 IL40_2446:5:1:0:257/2\n+ATGGAATCAACATCAAACGGAATAAAACGGAACTATCGAATGGAATCGAAGAGA\n++\n+@-+6:7<::9<>?;7:;>=59:>78:<:<4@9*?@=:>8@A8894=7;3.3618\n+@ERR005448.10 IL40_2446:5:1:0:842/2\n+TGGATTCCACTCCATTCCATTGCACTCGCGTTGATTCCATTCCATTCCACTCCC\n++\n+>81>?>5>?=@?>@>?>=9;>79=6122,/&6'+,0'50)6&606-)5%%5)'+\n+@ERR005448.11 IL40_2446:5:1:0:947/2\n+AAAAAAAAAAAGCTGGGAGTGGGGTGTGGTGATGAGCGCCTGTAGTCCTAGCTA\n++\n+AAAAAAAA?=>:8?:;539)=5><6511)347;674040443642-)8/6348:\n+@ERR005448.12 IL40_2446:5:1:0:389/2\n+CAGAGATGTTTAATATGAGCCAGGAATTCTGCTAGGCACTTTGTGTATGTTTTA\n++\n+@?559>@76>CCC@CC;A80=B>7?B?=<;><B?66;A=BC>>;>6A><;?B>>\n+@ERR005448.13 IL40_2446:5:1:0:435/2\n+ACTGAATTTGGAAAGGGTCACTAGTGAACTTTTTTTTGGTGATACTAAGTAATT\n++\n+89=7)?>C;8997/456>ABA8=588=4858@@>B>?0(6<9<?;<BB77;68<\n+@ERR005448.14 IL40_2446:5:1:0:552/2\n+CTTTGAGGGTGGGAAGGAATCCTGCCTCTAATTGCAGCTGTCACACCCCATCAG\n++\n+AA?/*<9=<'99=9<=7@A===2'2=8==449?;;9(0*23>7,34%586:;==\n+@ERR005448.15 IL40_2446:5:1:0:770/2\n+AAAAAAGTGCTGTTGGACAACAGTAAAAATTGAGTCTGTGTATATTGAGCTGAA\n++\n+>>>60298:,:35?5,399;-9-19:4)8728460%2;5./.35)8332&5125\n+@ERR005448.16 IL40_2446:5:1:0:788/2\n+AATGGCTTTGACCAAAATGCTGATAGTAATATGGACAATGATGTCGAGGCTGAG\n++\n+>>>6;:AB8)6;=?=??=8<@=9;9;8:=9;854;9?;9748488752755707\n+@ERR005448.17 IL40_2446:5:1:0:813/2\n+CGGAGTTTGAAAACACTTGAACTGATCTATACTCAATGAGAGGAGGTCATTGAT\n++\n+<;3'6<BA=A=A@@ABBA>BBB?=<B>C=@BA>>C@>B=<@A<>:;,7/.>710\n+@ERR005448.18 IL40_2446:5:1:0:1012/2\n+TGCCTACAATAGCAAATTTTGAAAAACATAGAGACCAAAATAAACCATAGTTAA\n++\n+>;4?4=??@B7'8>AA>?@;8=C?C;;:>=,=559*<D=9<>E<9<?8B4>;85\n+@ERR005448.19 IL40_2446:5:1:0:1150/2\n+TTATATGCAAGCGAAGAAGGTAAGTAGGTTACTGGATTTATGAGCCTGAGGTCA\n++\n+>=?AB8<:8438><=48;921<<00:3:69798)879:;;37<8639594/,0,\n+@ERR005448.20 IL40_2446:5:1:0:1254/2\n+TGTCTCTTCTCTCTTGAATTCCTGATCTGGCCTAAATTCGCACATACATTCTTT\n++\n+95<AC=BB@C@AAC??<AAA:==8;?@:.,8??:=86:85-:6>?<<@C?;A8;\n+@ERR005448.21 IL40_2446:5:1:0:1669/2\n+CAGCACCTGCTTTGTTGGCCTGGGGATGTTTCCTAGGTGGTTTATAGGTGGTTT\n++\n+8>3<A?36,8>@616<3199:6:730:4%6@9964,.&20'@52@6,16)/'.3\n+@ERR005448.22 IL40_2446:5:1:0:178/2\n+TCAGAGAGCCTTGGGCCAGAGTTCCAGGGTCTCTGGCAGTGGCTGCCAGGTGAG\n++\n+)<806'29<<A770.<@?<?80A6A858.'9?7:4,'0'.1>.6?2%%+121''\n+@ERR005448.23 IL40_2446:5:1:0:57/2\n+ATGATGGTAGAGAGGACATTGTCAAAGATGCCAAAATTTTAGACTCATTGGGAG\n++\n+A:3??AA9>>>B99;>ABCC>>C?B=0:BA=:ABB>@99?B;39B?79;06926\n+@ERR005448.24 IL40_2446:5:1:0:971/2\n+AAAATAATTCCCATTGCTATTTCTCGTGGATAGCATTAGAGCATCTGAATTTGG\n++\n+AAB@>ACCCA>@B=<1:C>;=?>@=>6-18<4.287:6.0/4<:65-.::=6+1\n+@ERR005448.25 IL40_2446:5:1:0:685/2\n+TTCCATTCCACACGGGTTGGTTCCCTTGCATCCCTTTCCATTCCATTCTATTGC\n++\n+:?A@@AAA?@@@95<:79393=:6<==4;<?7>0;9>973;4<352638%8724\n+@ERR005448.26 IL40_2446:5:1:0:311/2\n+GCTGACTGATAGGCCCCACTCGCGGTAGGGTGCTTCTAGCCCTTCCTACTGATA\n++\n+=?99?=<><?>33==?=A@=<6@74752277=4=<3<:(7977<7<78582:97\n+@ERR005448.27 IL40_2446:5:1:0:528/2\n+AGACTGGATTAAGAAAATGTGGCACATATACACCATGGAATACTATGCAGCCAT\n++\n+BACB<.;;?C@:=ABC@891:6<AAAB=3>@?8;<79=9>=<??<@9-7-6<;;\n+@ERR005448.28 IL40_2446:5:1:0:"..b"3&/-'+++/6/',1.,\n+@ERR005448.1075769 IL40_2446:5:10:1190:4/1\n+GCCGCAGCTCACATTTTGATATGGCTCAGGAAAGAGAAGCCTTGCTAAAGAAGC\n++\n+@@>A?9>A?=0=2?AAA=8<<;??@=8(:85/,659513;705795&-)5-3/4\n+@ERR005448.1076602 IL40_2446:5:10:1203:4/1\n+TGTTTATAGCAGTACAATTCATGATTGCAAAGATATGGAACAAACCTAATTGCC\n++\n+AC@C?:=B?>8<)9=?CA8;1<?;=9@@;8@=:<:)2;,<><21947239/95A\n+@ERR005448.1077152 IL40_2446:5:10:1211:6/1\n+GAACTCTACGGGAACTATGGCTTCCACCTGCATGGTATCTCTTAGGTACAGATC\n++\n+9099?A@A82<?9956:7776:422*/7,&,200/+.33531.&,6-/.51+&-\n+@ERR005448.1077221 IL40_2446:5:10:1212:900/1\n+AGAGGTGATGAAGTCAAACCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n++\n+<>.8><2'72:@?-@><7<>:><?@@B@@AA>@BBA@ABB?@>@A@?@=<?@;B\n+@ERR005448.1077313 IL40_2446:5:10:1213:6/1\n+CATCTCGTAAGTTTCTCTTCCCCTGTGATGCCCATTTTTCTTGTTTTTTTTTTT\n++\n+?BAB?@A9>9A9CCCC@CCCCBCAA>B>@C@<<0=A?A?@A@A;BCACACA@=<\n+@ERR005448.1077487 IL40_2446:5:10:1216:5/1\n+GCAATGGAATGGTGAAAAGAAATGTCAGCTGCGACTGGGCCACTGCACTCCAGC\n++\n+B=1@A?;07@BA;A85::<?=?<@=84@787&56533)71335254.,0/2&..\n+@ERR005448.1077616 IL40_2446:5:10:1218:2/1\n+TTTGACATGGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGTAGTGGTACAATCTC\n++\n+@@BB@?1>BB<?@AABB@>BB@@BBB@A=BB=BB;=>7>==:4=A885=<>859\n+@ERR005448.1078324 IL40_2446:5:10:1230:4/1\n+CAAATCCAAGTGAGAACTGCCTTGTTAGAGGAAAGGCTGTTTTCTACCTATGGA\n++\n+@:40?BB=<A?A=@<8=:>@>A?B<?=>7>?709>>;8;-.584.&/4,)023&\n+@ERR005448.1078507 IL40_2446:5:10:1233:2/1\n+GATTCAATCACCTCCCACTAGGCCCCTCCTCCAGCACTGGGGATTACAATTCAG\n++\n+?77.=:5;</;?AA?;::5597/:??=;7->4&775711/71*.30233656-)\n+@ERR005448.1078651 IL40_2446:5:10:1235:1070/1\n+TAAACAAAAGGCATGAAGCTCTAAGCCAGTGGGCTTGAATTTTCTCCACCATAA\n++\n+?8>AB?<84?CAB@;;9AA;AA;?9?@@B7><>>AC@;1==B>@A?A=@<47(2\n+@ERR005448.1078696 IL40_2446:5:10:1236:6/1\n+CTGAGCTGGAGTTCTACGAATCATGGTCTGGAGATGAAAACTGGGAAGACAAAG\n++\n+AB>;@A?@?7>?BB>9<>:8>@<=?@:AB@>4=::@52/56<><;0+8/46),7\n+@ERR005448.1078883 IL40_2446:5:10:1239:6/1\n+GCTTCAGTCTCCCTTGAAATGTAAATATGCAGGAAGATAGCCTTTCGGTTTTCC\n++\n+BA:>AB:@B@@B@?;;;7;:@>:899<B>>A95,-777,25;,05-;,&58:57\n+@ERR005448.1079004 IL40_2446:5:10:1241:4/1\n+GTGCTTATCATCTTAGAGAACACGTGTATCATTATGAACAGAATGATGCTAGAA\n++\n+C9CCA8'9@??ACA=A9A>;?=>C;@@BC>=@@@@@7@86=<=;C?=A=;5?1<\n+@ERR005448.1079882 IL40_2446:5:10:1255:5/1\n+AATGGAATGGAATGGAATGGAATCACCTCTATTGCAATCGAATGGAATGGAATG\n++\n+9;AB?87@B;AACAA>?BCB>9?C=?ABBB>C@CA?>CB9(4?A>><@@:;;;@\n+@ERR005448.1080580 IL40_2446:5:10:1267:4/1\n+GATTAACTATGATGACTGTTTTGTTCACAAGGTGGACATTTTTAACTGTGAAAA\n++\n+@:=A<9=?7?>3:A9=C@;=?B@::@?=7/<<6=;807296>:3158=4>57.,\n+@ERR005448.1080835 IL40_2446:5:10:1271:3/1\n+CACGTTTATTGCGGCACTATTCACAATAGCAAAGACTTGGCACCAACCCAAACG\n++\n+?37A;?>A7;9;-A=1;=;;5@@>@B=1;/979>6>69:8(:>932A@>7/)&3\n+@ERR005448.1081009 IL40_2446:5:10:1273:4/1\n+TGAAGAATGTCAATGGTAGTTTGATGGGAATAGCATTGAATCTATGAATTTCTT\n++\n+CC=+A5=?B6@A8CBC7<;2>>B4@B@;<6:7@94:8=337:;1<7756::77B\n+@ERR005448.1081178 IL40_2446:5:10:1276:6/1\n+TAGGGTCATTATAGATATAGTTAGATAAGATGAGGTCATATTGAAGCAGGATGG\n++\n+<+;BC7A:@??=><5A=ACB7B>CB<@CCAAC2C?.:B>9;A<-8;372=0458\n+@ERR005448.1081342 IL40_2446:5:10:1279:4/1\n+AGGGGAGTAGGCTTACTTTGTATCTGACCCACGTTTCCTCGAGACAAAACACCC\n++\n+>=BA==='<<ABA?<@AAA@@98B=<@<@:6=@54?B?1??/=,176699650(\n+@ERR005448.1081475 IL40_2446:5:10:1281:6/1\n+CATTCCGGGTCACTTTTTCCTGTCCTTTTGAGGCCAGAAATGCATATTGTAATG\n++\n+>6>@BBBBB7BB<=ABBBB@@B(278B7@@9<4;A>@448-@A(=(7@88;78;\n+@ERR005448.1081577 IL40_2446:5:10:1282:3/1\n+AGGGAGCCTCTCATGAGAGAAACACAGGGAAGGTGGGAAGGGAGATCCTGGGCT\n++\n+;5@BBBAB@<:<=:7:6@A@0@@A=7>9?<<03.<>5-(;<?3570'2,@><76\n+@ERR005448.1081665 IL40_2446:5:10:1284:5/1\n+GACCAATAAGGAAGAAAAGAGAGAAGAATCAAATAGACGCATTAAAAAATGATA\n++\n+.=@/<;?59>=66?739=?;:/=79?<4,994<94@878/96111.32711175\n+@ERR005448.1081931 IL40_2446:5:10:1288:2/1\n+AGTGATGAACGGATAAGCGAAGTGTGCTACATATAACAATGGAATATTATCCAA\n++\n+@CC><635>94::=>5AB<19B>AC<);B::>BB@C<C?=B96.;:?4:6)'&5\n+@ERR005448.1081992 IL40_2446:5:10:1289:600/2\n+TGCTCTTCTGTTTGCTCTTCTGTTTACCATTCTCTTCTTCTGGGATTTCTACTG\n++\n+BACC@ABBA?@CC?CCACBCCC@BCBABABABA?ACA?CBCA@?AA@CBBA@@:\n+@ERR005448.1082627 IL40_2446:5:10:1299:6/1\n+CTTAGGCATTTTGAAGTTCTTGTGTGCCAGAGCTGTGGCTGGGGGTTTGTCTCA\n++\n+>@;;?A:7=AAAB<<B<=AAB@A;0?BBAB><'=A49@@@BB@A,32;:(6-3/\n"
b
diff -r 000000000000 -r e28c965eeed4 test-data/fastqc_contaminants.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqc_contaminants.txt Mon Jan 27 09:29:14 2014 -0500
[
b"@@ -0,0 +1,170 @@\n+# This file contains a list of potential contaminants which are\r\n+# frequently found in high throughput sequencing reactions.  These\r\n+# are mostly sequences of adapters / primers used in the various\r\n+# sequencing chemistries.\r\n+# \r\n+# Please DO NOT rely on these sequences to design your own oligos, some\r\n+# of them are truncated at ambiguous positions, and none of them are\r\n+# definitive sequences from the manufacturers so don't blame us if you\r\n+# try to use them and they don't work.\r\n+#\r\n+# You can add more sequences to the file by putting one line per entry\r\n+# and specifying a name[tab]sequence.  If the contaminant you add is \r\n+# likely to be of use to others please consider sending it to the FastQ\r\n+# authors, either via a bug report at www.bioinformatics.bbsrc.ac.uk/bugzilla/\r\n+# or by directly emailing simon.andrews@bbsrc.ac.uk so other users of\r\n+# the program can benefit.\r\n+\r\n+Illumina Single End Apapter 1\t\t\t\t\tACACTCTTTCCCTACACGACGCTGTTCCATCT\r\n+Illumina Single End Apapter 2\t\t\t\t\tCAAGCAGAAGACGGCATACGAGCTCTTCCGATCT\r\n+Illumina Single End PCR Primer 1\t\t\t\tAATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Single End PCR Primer 2\t\t\t\tCAAGCAGAAGACGGCATACGAGCTCTTCCGATCT\r\n+Illumina Single End Sequencing Primer\t\t\tACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+\r\n+Illumina Paired End Adapter 1\t\t\t\t\tACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Paired End Adapter 2\t\t\t\t\tCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT\r\n+Illumina Paried End PCR Primer 1\t\t\t\tAATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Paired End PCR Primer 2\t\t\t\tCAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT\r\n+Illumina Paried End Sequencing Primer 1\t\t\tACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Paired End Sequencing Primer 2\t\t\tCGGTCTCGGCATTCCTACTGAACCGCTCTTCCGATCT\r\n+\r\n+Illumina DpnII expression Adapter 1\t\t\t\tACAGGTTCAGAGTTCTACAGTCCGAC\r\n+Illumina DpnII expression Adapter 2\t\t\t\tCAAGCAGAAGACGGCATACGA\r\n+Illumina DpnII expression PCR Primer 1\t\t\tCAAGCAGAAGACGGCATACGA\r\n+Illumina DpnII expression PCR Primer 2\t\t\tAATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA\r\n+Illumina DpnII expression Sequencing Primer\t\tCGACAGGTTCAGAGTTCTACAGTCCGACGATC\r\n+\r\n+Illumina NlaIII expression Adapter 1\t\t\tACAGGTTCAGAGTTCTACAGTCCGACATG\r\n+Illumina NlaIII expression Adapter 2\t\t\tCAAGCAGAAGACGGCATACGA\r\n+Illumina NlaIII expression PCR Primer 1\t\t\tCAAGCAGAAGACGGCATACGA\r\n+Illumina NlaIII expression PCR Primer 2\t\t\tAATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA\r\n+Illumina NlaIII expression Sequencing Primer\tCCGACAGGTTCAGAGTTCTACAGTCCGACATG\r\n+\r\n+Illumina Small RNA Adapter 1\t\t\t\t\tGTTCAGAGTTCTACAGTCCGACGATC\r\n+Illumina Small RNA Adapter 2\t\t\t\t\tTCGTATGCCGTCTTCTGCTTGT\r\n+Illumina Small RNA RT Primer\t\t\t\t\tCAAGCAGAAGACGGCATACGA\r\n+Illumina Small RNA PCR Primer 1\t\t\t\t\tCAAGCAGAAGACGGCATACGA\r\n+Illumina Small RNA PCR Primer 2\t\t\t\t\tAATGATACGGCGACCACCGACAGGTTCAGAGTTCTACAGTCCGA\r\n+Illumina Small RNA Sequencing Primer\t\t\tCGACAGGTTCAGAGTTCTACAGTCCGACGATC\r\n+\r\n+Illumina Multiplexing Adapter 1\t\t\t\t\tGATCGGAAGAGCACACGTCT\r\n+Illumina Multiplexing Adapter 2\t\t\t\t\tACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Multiplexing PCR Primer 1.01\t\t\tAATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Multiplexing PCR Primer 2.01\t\t\tGTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT\r\n+Illumina Multiplexing Read1 Sequencing Primer\tACACTCTTTCCCTACACGACGCTCTTCCGATCT\r\n+Illumina Multiplexing Index Sequencing Primer\tGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\r\n+Illumina Multiplexing Read2 Sequencing Primer\tGTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT\r\n+\r\n+Illumina PCR Primer Index 1\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCGTGATGTGACTGGAGTTC\r\n+Illumina PCR Primer Index 2\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATACATCGGTGACTGGAGTTC\r\n+Illumina PCR Primer Index 3\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGCCTAAGTGACTGGAGTTC\r\n+Illumina PCR Primer Index 4\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTGGTCAGTGACTGGAGTTC\r\n+Illumina PCR Primer Index 5\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCACTGTGTGACTGGAGTTC\r\n+Illumina PCR Primer Index 6\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATATTGGCGTGACTGGAGTTC\r\n+Illumina PCR Primer "..b"Primer, Index 13\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTTGACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 14\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGGAACTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 15\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTGACATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 16\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGGACGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 17\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCTCTACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 18\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGCGGACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 19\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTTTCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 20\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGGCCACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 21\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCGAAACGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 22\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCGTACGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 23\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCCACTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 24\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGCTACCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 25\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATATCAGTGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 26\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGCTCATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 27\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATAGGAATGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 28\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCTTTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 29\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTAGTTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 30\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCCGGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 31\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATATCGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 32\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTGAGTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 33\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCGCCTGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 34\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGCCATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 35\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATAAAATGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 36\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTGTTGGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 37\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATATTCCGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 38\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATAGCTAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 39\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGTATAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 40\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTCTGAGGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 41\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGTCGTCGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 42\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCGATTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 43\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGCTGTAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 44\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATATTATAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 45\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATGAATGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 46\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTCGGGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 47\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATCTTCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+RNA PCR Primer, Index 48\t\t\t\t\t\tCAAGCAGAAGACGGCATACGAGATTGCCGAGTGACTGGAGTTCCTTGGCACCCGAGAATTCCA\r\n+\r\n+ABI Dynabead EcoP Oligo\t\t\t\t\t\t\tCTGATCTAGAGGTACCGGATCCCAGCAGT\r\n+ABI Solid3 Adapter A\t\t\t\t\t\t\tCTGCCCCGGGTTCCTCATTCTCTCAGCAGCATG\r\n+ABI Solid3 Adapter B\t\t\t\t\t\t\tCCACTACGCCTCCGCTTTCCTCTCTATGGGCAGTCGGTGAT\r\n+ABI Solid3 5' AMP Primer\t\t\t\t\t\tCCACTACGCCTCCGCTTTCCTCTCTATG\r\n+ABI Solid3 3' AMP Primer\t\t\t\t\t\tCTGCCCCGGGTTCCTCATTCT\r\n+ABI Solid3 EF1 alpha Sense Primer\t\t\t\tCATGTGTGTTGAGAGCTTC\r\n+ABI Solid3 EF1 alpha Antisense Primer\t\t\tGAAAACCAAAGTGGTCCAC\r\n+ABI Solid3 GAPDH Forward Primer\t\t\t\t\tTTAGCACCCCTGGCCAAGG\r\n+ABI Solid3 GAPDH Reverse Primer\t\t\t\t\tCTTACTCCTTGGAGGCCATG\r\n"
b
diff -r 000000000000 -r e28c965eeed4 test-data/fastqc_report.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fastqc_report.html Mon Jan 27 09:29:14 2014 -0500
[
b'@@ -0,0 +1,1809 @@\n+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Strict//EN">\n+<html>\n+<head><title>dataset_1.dat FastQC Report</title>\n+\n+<style type="text/css">\n+\n+ @media screen {\n+  div.summary {\n+    width: 18em;\n+    position:fixed;\n+    top: 3em;\n+    margin:1em 0 0 1em;\n+  }\n+  \n+  div.main {\n+    display:block;\n+    position:absolute;\n+    overflow:auto;\n+    height:auto;\n+    width:auto;\n+    top:4.5em;\n+    bottom:2.3em;\n+    left:18em;\n+    right:0;\n+    border-left: 1px solid #CCC;\n+    padding:0 0 0 1em;\n+    background-color: white;\n+    z-index:1;\n+  }\n+  \n+  div.header {\n+    background-color: #EEE;\n+    border:0;\n+    margin:0;\n+    padding: 0.5em;\n+    font-size: 200%;\n+    font-weight: bold;\n+    position:fixed;\n+    width:100%;\n+    top:0;\n+    left:0;\n+    z-index:2;\n+  }\n+\n+  div.footer {\n+    background-color: #EEE;\n+    border:0;\n+    margin:0;\n+\tpadding:0.5em;\n+    height: 1.3em;\n+\toverflow:hidden;\n+    font-size: 100%;\n+    font-weight: bold;\n+    position:fixed;\n+    bottom:0;\n+    width:100%;\n+    z-index:2;\n+  }\n+  \n+  img.indented {\n+    margin-left: 3em;\n+  }\n+ }\n+ \n+ @media print {\n+\timg {\n+\t\tmax-width:100% !important;\n+\t\tpage-break-inside: avoid;\n+\t}\n+\th2, h3 {\n+\t\tpage-break-after: avoid;\n+\t}\n+\tdiv.header {\n+      background-color: #FFF;\n+    }\n+\t\n+ }\n+ \n+ body {    \n+  font-family: sans-serif;   \n+  color: #000;   \n+  background-color: #FFF;\n+  border: 0;\n+  margin: 0;\n+  padding: 0;\n+  }\n+  \n+  div.header {\n+  border:0;\n+  margin:0;\n+  padding: 0.5em;\n+  font-size: 200%;\n+  font-weight: bold;\n+  width:100%;\n+  }    \n+  \n+  #header_title {\n+  display:inline-block;\n+  float:left;\n+  clear:left;\n+  }\n+  #header_filename {\n+  display:inline-block;\n+  float:right;\n+  clear:right;\n+  font-size: 50%;\n+  margin-right:2em;\n+  text-align: right;\n+  }\n+\n+  div.header h3 {\n+  font-size: 50%;\n+  margin-bottom: 0;\n+  }\n+  \n+  div.summary ul {\n+  padding-left:0;\n+  list-style-type:none;\n+  }\n+  \n+  div.summary ul li img {\n+  margin-bottom:-0.5em;\n+  margin-top:0.5em;\n+  }\n+\t  \n+  div.main {\n+  background-color: white;\n+  }\n+      \n+  div.module {\n+  padding-bottom:1.5em;\n+  padding-top:1.5em;\n+  }\n+\t  \n+  div.footer {\n+  background-color: #EEE;\n+  border:0;\n+  margin:0;\n+  padding: 0.5em;\n+  font-size: 100%;\n+  font-weight: bold;\n+  width:100%;\n+  }\n+\n+\n+  a {\n+  color: #000080;\n+  }\n+\n+  a:hover {\n+  color: #800000;\n+  }\n+      \n+  h2 {\n+  color: #800000;\n+  padding-bottom: 0;\n+  margin-bottom: 0;\n+  clear:left;\n+  }\n+\n+  table { \n+  margin-left: 3em;\n+  text-align: center;\n+  }\n+  \n+  th { \n+  text-align: center;\n+  background-color: #000080;\n+  color: #FFF;\n+  padding: 0.4em;\n+  }      \n+  \n+  td { \n+  font-family: monospace; \n+  text-align: left;\n+  background-color: #EEE;\n+  color: #000;\n+  padding: 0.4em;\n+  }\n+\n+  img {\n+  padding-top: 0;\n+  margin-top: 0;\n+  border-top: 0;\n+  }\n+\n+  \n+  p {\n+  padding-top: 0;\n+  margin-top: 0;\n+  }\n+  \n+</style>\n+\n+</head>\n+<body>\n+<div class="header">\n+<div id="header_title"><img src="fastqc_icon.png" alt="FastQC">FastQC Report</div>\n+<div id="header_filename">\n+Wed 16 Nov 2011<br />\n+dataset_1.dat\n+</div>\n+</div>\n+<div class="summary">\n+<h2>Summary</h2>\n+<ul>\n+<li><img src="tick.png" alt="[PASS]"> <a href="#M0">Basic Statistics</a></li>\n+<li><img src="warning.png" alt="[WARNING]"> <a href="#M1">Per base sequence quality</a></li>\n+<li><img src="warning.png" alt="[WARNING]"> <a href="#M2">Per sequence quality scores</a></li>\n+<li><img src="warning.png" alt="[WARNING]"> <a href="#M3">Per base sequence content</a></li>\n+<li><img src="tick.png" alt="[PASS]"> <a href="#M4">Per base GC content</a></li>\n+<li><img src="warning.png" alt="[WARNING]"> <a href="#M5">Per sequence GC content</a></li>\n+<li><img src="tick.png" alt="[PASS]"> <a href="#M6">Per base N content</a></li>\n+<li><img src="tick.png" alt="[PASS]"> <a href="#M7">Sequence Length Distribution</a></li>\n+<li><img src="tick.png" alt="[PASS]"> <a href="#M8">Sequence Duplication Levels</a></li>\n+<li><img src="warn'..b'11</td>\n+</tr>\n+<tr>\n+<td>CTCAT</td>\n+<td>255</td>\n+<td>0.9801258</td>\n+<td>5.7624474</td>\n+<td>4</td>\n+</tr>\n+<tr>\n+<td>GAACC</td>\n+<td>210</td>\n+<td>0.9783144</td>\n+<td>5.820272</td>\n+<td>25</td>\n+</tr>\n+<tr>\n+<td>GGCGG</td>\n+<td>110</td>\n+<td>0.93654746</td>\n+<td>10.637051</td>\n+<td>44</td>\n+</tr>\n+<tr>\n+<td>GCGGG</td>\n+<td>110</td>\n+<td>0.93654746</td>\n+<td>6.3886194</td>\n+<td>36</td>\n+</tr>\n+<tr>\n+<td>ATCGG</td>\n+<td>190</td>\n+<td>0.93637353</td>\n+<td>6.163312</td>\n+<td>30</td>\n+</tr>\n+<tr>\n+<td>GTCTG</td>\n+<td>180</td>\n+<td>0.9363471</td>\n+<td>5.2096424</td>\n+<td>41</td>\n+</tr>\n+<tr>\n+<td>GCGGT</td>\n+<td>140</td>\n+<td>0.93170583</td>\n+<td>13.303162</td>\n+<td>8</td>\n+</tr>\n+<tr>\n+<td>CGGGG</td>\n+<td>105</td>\n+<td>0.8939771</td>\n+<td>6.3886194</td>\n+<td>46</td>\n+</tr>\n+<tr>\n+<td>GGGCG</td>\n+<td>105</td>\n+<td>0.8939771</td>\n+<td>8.518159</td>\n+<td>43</td>\n+</tr>\n+<tr>\n+<td>TCGGA</td>\n+<td>170</td>\n+<td>0.83780795</td>\n+<td>7.395975</td>\n+<td>31</td>\n+</tr>\n+<tr>\n+<td>CTTGT</td>\n+<td>205</td>\n+<td>0.83355045</td>\n+<td>5.0850673</td>\n+<td>38</td>\n+</tr>\n+<tr>\n+<td>TGCCG</td>\n+<td>125</td>\n+<td>0.83002883</td>\n+<td>14.93275</td>\n+<td>23</td>\n+</tr>\n+<tr>\n+<td>AGGCG</td>\n+<td>125</td>\n+<td>0.7881193</td>\n+<td>6.3016763</td>\n+<td>15</td>\n+</tr>\n+<tr>\n+<td>CGGTT</td>\n+<td>150</td>\n+<td>0.78028923</td>\n+<td>10.398446</td>\n+<td>9</td>\n+</tr>\n+<tr>\n+<td>GTGAC</td>\n+<td>150</td>\n+<td>0.7392424</td>\n+<td>6.163313</td>\n+<td>32</td>\n+</tr>\n+<tr>\n+<td>GCCGA</td>\n+<td>115</td>\n+<td>0.723456</td>\n+<td>12.575301</td>\n+<td>24</td>\n+</tr>\n+<tr>\n+<td>CCCGC</td>\n+<td>85</td>\n+<td>0.7188745</td>\n+<td>6.3460584</td>\n+<td>30</td>\n+</tr>\n+<tr>\n+<td>CCCCG</td>\n+<td>85</td>\n+<td>0.7188745</td>\n+<td>10.576764</td>\n+<td>29</td>\n+</tr>\n+<tr>\n+<td>CGAGA</td>\n+<td>135</td>\n+<td>0.6303192</td>\n+<td>9.333206</td>\n+<td>26</td>\n+</tr>\n+<tr>\n+<td>CGGGA</td>\n+<td>90</td>\n+<td>0.5674459</td>\n+<td>6.301676</td>\n+<td>24</td>\n+</tr>\n+<tr>\n+<td>CACGG</td>\n+<td>85</td>\n+<td>0.5347284</td>\n+<td>6.2876506</td>\n+<td>37</td>\n+</tr>\n+<tr>\n+<td>CGCCT</td>\n+<td>80</td>\n+<td>0.5300361</td>\n+<td>6.6286345</td>\n+<td>30</td>\n+</tr>\n+<tr>\n+<td>CCGCA</td>\n+<td>75</td>\n+<td>0.47076905</td>\n+<td>6.2799363</td>\n+<td>36</td>\n+</tr>\n+<tr>\n+<td>CGTTG</td>\n+<td>40</td>\n+<td>0.20807713</td>\n+<td>5.2044277</td>\n+<td>46</td>\n+</tr>\n+</table>\n+</div>\n+</body></html><div class="module"><h2>Files created by FastQC</h2><table cellspacing="2" cellpadding="2">\n+<tr><td><a href="dataset_1.dat_fastqc.zip">dataset_1.dat_fastqc.zip  (312.4 KB)</a></td></tr>\n+<tr><td><a href="duplication_levels.png">duplication_levels.png  (14.5 KB)</a></td></tr>\n+<tr><td><a href="fastqc_data.txt">fastqc_data.txt  (15.0 KB)</a></td></tr>\n+<tr><td><a href="fastqc_report.html">fastqc_report.html  (25.2 KB)</a></td></tr>\n+<tr><td><a href="kmer_profiles.png">kmer_profiles.png  (186.7 KB)</a></td></tr>\n+<tr><td><a href="per_base_gc_content.png">per_base_gc_content.png  (12.1 KB)</a></td></tr>\n+<tr><td><a href="per_base_n_content.png">per_base_n_content.png  (7.4 KB)</a></td></tr>\n+<tr><td><a href="per_base_quality.png">per_base_quality.png  (9.6 KB)</a></td></tr>\n+<tr><td><a href="per_base_sequence_content.png">per_base_sequence_content.png  (23.9 KB)</a></td></tr>\n+<tr><td><a href="per_sequence_gc_content.png">per_sequence_gc_content.png  (29.6 KB)</a></td></tr>\n+<tr><td><a href="per_sequence_quality.png">per_sequence_quality.png  (21.9 KB)</a></td></tr>\n+<tr><td><a href="sequence_length_distribution.png">sequence_length_distribution.png  (18.9 KB)</a></td></tr>\n+<tr><td><a href="summary.txt">summary.txt  (465 B)</a></td></tr>\n+</table>\n+<a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC documentation and full attribution is here</a><br/><hr/>\n+FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n+</div></div><div class="footer">Produced by <a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC</a> (version 0.10.0)</div>\n'
b
diff -r 000000000000 -r e28c965eeed4 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Jan 27 09:29:14 2014 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="FastQC" version="0.10.1">
+      <repository changeset_revision="4c650e5219e0" name="package_fastqc_0_10_1" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>