annotate fastqvalidator.xml @ 21:912476854271 draft

Uploaded
author nilesh
date Wed, 26 Jun 2013 20:53:09 -0400
parents 96c3c6ca8b5f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
1 <tool id="fastq_validator_wrapper" name="FastQ Validator">
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
2 <description>for each sequence in a file</description>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
3 <requirements>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
4 <requirement type="set_environment">FQVAL_SCRIPT_PATH</requirement>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
5 <requirement type="package" version="1.0.0">fastq_validator</requirement>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
6 </requirements>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
7 <command> \$FQVAL_SCRIPT_PATH/fastQValidator --file $input --minReadLen $minReadLen --maxErrors $maxErrors --printableErrors $printableErrors $baseComposition $disableSeqIDCheck $quiet $avgQual $spacetype $params > $output</command>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
8 <inputs>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
9 <param name="input" type="data" format="fastq,txt" label="FASTQ file"/>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
10 <param name="minReadLen" value="10" type="integer" min="1" label="Minimum allowed read length (Default=10)"/>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
11 <param name="maxErrors" type="integer" value="-1" min="-1" label="Number of errors to allow (Default=-1)" />
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
12 <param name="printableErrors" type="integer" value="20" optional="true" min="0" label="Max errors to print before suppressing (Default=20)" />
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
13 <param name="baseComposition" type="boolean" optional="true" label="Print Base Composition Statistics" truevalue="--baseComposition"/>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
14 <param name="avgQual" type="boolean" optional="true" label="Print Avg Phred Quality/Cycle and Overall Avg Quality" truevalue="--avgQual"/>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
15 <param name="disableSeqIDCheck" type="boolean" optional="true" label="Disable unique sequence identifier check (check to save memory)" truevalue="--disableSeqIDCheck"/>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
16 <param name="quiet" type="boolean" optional="false" label="Suppress error/summary statistics display" truevalue="--quiet" />
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
17 <param name="params" type="boolean" optional="false" label="Print parameter settings" truevalue="--params"/>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
18 <param name="spacetype" type="select" label="Space Options for Raw Sequence (Default=Auto)" display="radio">
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
19 <option selected="true" value="--auto">Auto</option>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
20 <option value="--baseSpace">BaseSpace</option>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
21 <option value="--colorSpace">ColorSpace</option>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
22 </param>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
23 </inputs>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
24 <outputs>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
25 <data format="txt" name="output" />
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
26 </outputs>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
27 <help>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
28
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
29 About
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
30 +++++
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
31
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
32 The fastQValidator validates the format of fastq files.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
33 The initial version of a FASTQ Validator is complete. It was built using LibStatGen: FASTQ which is part of the libStatGen library.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
34
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
35
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
36 Info on Errors
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
37 ++++++++++++++
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
38
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
39 Number of Errors to allow (default+-1):
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
40 Number of errors to allow before quitting reading/validating the file. -1 (default) indicates to not quit until the entire file is read. 0 indicates not to read/validate anything.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
41
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
42 Max errors to print before suppressing (defualt+20):
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
43 Maximum number of errors to print before suppressing them (Defaults to 20). Different than maxErrors since printableErrors will continue reading and validating the file until the end, but just doesn't print the errors.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
44
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
45 **Info on Space Options for Raw Sequence**
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
46 auto: Determine baseSpace/colorSpace from the Raw Sequence in the file (Default)
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
47 baseSpace: ACTGN only
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
48 colorSpace: 0123. only (with 1 character primer base)
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
49
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
50
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
51 Output
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
52 ++++++
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
53
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
54 When running the fastQValidator Executable, if the --params option is specified, the output starts with a summary of the parameters::
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
55
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
56 =============================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
57 The following parameters are available. Ones with "[]" are in effect::
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
58
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
59 Input Parameters
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
60 --file [../fastqValidator/test/testFile.txt], --baseComposition,
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
61 --disableSeqIDCheck, --quiet, --params [ON], --minReadLen [10],
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
62 --maxErrors [-1]
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
63 Space Type : --baseSpace, --colorSpace, --auto [ON]
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
64 Errors : --ignoreErrors, --printableErrors [20]
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
65 =============================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
66
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
67 The Validator Executable outputs error messages for invalid sequences based on Validation Criteria. For Example: ::
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
68
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
69 ======================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
70 ERROR on Line 25: The sequence identifier line was too short.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
71 ERROR on Line 29: First line of a sequence does not begin wtih @
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
72 ERROR on Line 33: No Sequence Identifier specified before the comment.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
73 ======================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
74
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
75 Base Composition Percentages by Index are printed if --printBaseComp is set to ON. For Example: ::
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
76
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
77 ========================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
78 Base Composition Statistics:
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
79 Read Index %A %C %G %T %N Total Reads At Index
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
80 0 100.00 0.00 0.00 0.00 0.00 20
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
81 1 5.00 95.00 0.00 0.00 0.00 20
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
82 2 5.00 0.00 5.00 90.00 0.00 20
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
83 ========================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
84
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
85 Phred Quality by Index are printed if --avgQual is set to ON in a version after May 29, 2012. Only valid qualities are included in these averages. For Example::
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
86
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
87 ==================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
88 Average Phred Quality by Read Index (starts at 0):
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
89 Read Index Average Quality
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
90 0 44.10
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
91 1 45.55
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
92 2 51.11
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
93 3 47.68
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
94 4 47.37
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
95
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
96 Overall Average Phred Quality = 50.40
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
97 ==================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
98
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
99 Summary of the number of lines, sequences, and errors: ::
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
100
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
101 =======================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
102 Finished processing testFile.txt with 92 lines containing 20 sequences.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
103 There were a total of 17 errors.
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
104 =======================================================================
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
105
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
106 </help>
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
107
96c3c6ca8b5f Uploaded
nilesh
parents:
diff changeset
108 </tool>