annotate rgFastQC.xml @ 1:8fae48caaf06 draft

Uploaded form GH
author devteam
date Tue, 11 Nov 2014 12:46:27 -0500
parents e28c965eeed4
children d2cf2c0c8a11
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
1 <tool name="FastQC:Read QC" id="fastqc" version="0.62">
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
2 <description>reports using FastQC</description>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
3 <command interpreter="python">
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
4 rgFastQC.py -i "$input_file" -d "$html_file.files_path" -o "$html_file" -t "$text_file" -n "$out_prefix" -f "$input_file.ext" -j "$input_file.name" -e "\$FASTQC_JAR_PATH/fastqc"
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
5 #if $contaminants.dataset and str($contaminants) > ''
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
6 -c "$contaminants"
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
7 #end if
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
8 #if $limits.dataset and str($limits) > ''
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
9 -l "$limits"
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
10 #end if
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
11 </command>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
12 <requirements>
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
13 <requirement type="package" version="0.11.2">FastQC</requirement>
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
14 </requirements>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
15 <inputs>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
16 <param format="fastqsanger,fastq,bam,sam" name="input_file" type="data" label="Short read data from your current history" />
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
17 <param name="out_prefix" value="FastQC" type="text" label="Title for the output file - to remind you what the job was for" size="80"
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
18 help="Letters and numbers only please - other characters will be removed">
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
19 <sanitizer invalid_char="">
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
20 <valid initial="string.letters,string.digits"/>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
21 </sanitizer>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
22 </param>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
23 <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
24 help="tab delimited file with 2 columns: name and sequence. For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA"/>
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
25 <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
26 help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter" />
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
27 </inputs>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
28 <outputs>
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
29 <data format="html" name="html_file" label="${out_prefix}_${input_file.name}_Webpage.html" />
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
30 <data format="txt" name="text_file" label="${out_prefix}_${input_file.name}_RawData.txt" />
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
31 </outputs>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
32 <tests>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
33 <test>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
34 <param name="input_file" value="1000gsample.fastq" />
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
35 <param name="out_prefix" value="fastqc_out" />
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
36 <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
37 <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
38 <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="100"/>
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
39 </test>
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
40 <test>
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
41 <param name="input_file" value="1000gsample.fastq" />
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
42 <param name="out_prefix" value="fastqc_out" />
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
43 <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
44 <output name="html_file" file="fastqc_report2.html" ftype="html" lines_diff="100"/>
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
45 <output name="text_file" file="fastqc_data2.txt" ftype="txt" lines_diff="100"/>
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
46 </test>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
47 </tests>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
48 <help>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
49
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
50 .. class:: infomark
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
51
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
52 **Purpose**
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
53
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
54 FastQC aims to provide a simple way to do some quality control checks on raw
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
55 sequence data coming from high throughput sequencing pipelines.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
56 It provides a modular set of analyses which you can use to give a quick
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
57 impression of whether your data has any problems of
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
58 which you should be aware before doing any further analysis.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
59
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
60 The main functions of FastQC are:
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
61
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
62 - Import of data from BAM, SAM or FastQ files (any variant)
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
63 - Providing a quick overview to tell you in which areas there may be problems
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
64 - Summary graphs and tables to quickly assess your data
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
65 - Export of results to an HTML based permanent report
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
66 - Offline operation to allow automated generation of reports without running the interactive application
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
67
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
68
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
69 -----
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
70
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
71
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
72 .. class:: infomark
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
73
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
74 **FastQC**
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
75
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
76 This is a Galaxy wrapper. It merely exposes the external package FastQC_ which is documented at FastQC_
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
77 Kindly acknowledge it as well as this tool if you use it.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
78 FastQC incorporates the Picard-tools_ libraries for sam/bam processing.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
79
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
80 The contaminants file parameter was borrowed from the independently developed
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
81 fastqcwrapper contributed to the Galaxy Community Tool Shed by J. Johnson.
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
82 Adaption to version 0.11.2 by T. McGowan.
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
83
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
84 -----
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
85
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
86 .. class:: infomark
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
87
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
88 **Inputs and outputs**
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
89
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
90 FastQC_ is the best place to look for documentation - it's very good.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
91 A summary follows below for those in a tearing hurry.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
92
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
93 This wrapper will accept a Galaxy fastq, sam or bam as the input read file to check.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
94 It will also take an optional file containing a list of contaminants information, in the form of
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
95 a tab-delimited file with 2 columns, name and sequence. As another option the tool takes a custom
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
96 limits.txt file that allows setting the warning thresholds for the different modules and also specifies
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
97 which modules to include in the output.
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
98
1
8fae48caaf06 Uploaded form GH
devteam
parents: 0
diff changeset
99 The tool produces a basic text and a HTML output file that contain all of the results, including the following:
0
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
100
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
101 - Basic Statistics
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
102 - Per base sequence quality
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
103 - Per sequence quality scores
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
104 - Per base sequence content
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
105 - Per base GC content
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
106 - Per sequence GC content
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
107 - Per base N content
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
108 - Sequence Length Distribution
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
109 - Sequence Duplication Levels
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
110 - Overrepresented sequences
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
111 - Kmer Content
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
112
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
113 All except Basic Statistics and Overrepresented sequences are plots.
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
114 .. _FastQC: http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
115 .. _Picard-tools: http://picard.sourceforge.net/index.shtml
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
116
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
117 </help>
e28c965eeed4 Imported from capsule None
devteam
parents:
diff changeset
118 </tool>