annotate je-markdupes.xml @ 2:f6bc74863c1c draft

planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 62411561ae3bc65ea8762d27ec79a7d912503e5b
author gbcs-embl-heidelberg
date Wed, 07 Dec 2016 11:57:09 -0500
parents d39a96961423
children 4ccf1406832d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
1 <tool id="je_markdupes" name="Je-MarkDuplicates" version="1.0">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
2 <description>to filter BAM files for read duplicates taking UMIs into account</description>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
3 <macros>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
4 <import>macros.xml</import>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
5 </macros>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
6 <stdio>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
7 <exit_code range="1:" level="fatal" description="Tool exception" />
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
8 </stdio>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
9 <version_command>echo '1.0'</version_command>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
10 <command interpreter="bash">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
11 <![CDATA[
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
12 je markdupes
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
13
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
14 ## picard MarkDuplicates defaults
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
15 INPUT="${inputFile}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
16 OUTPUT="${outFile}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
17
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
18 METRICS_FILE="${metrics_file}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
19
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
20 REMOVE_DUPLICATES="${remove_duplicates}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
21 ASSUME_SORTED="${assume_sorted}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
22
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
23 #for $element in $adv_options.comments:
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
24 COMMENT="${element.comment}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
25 #end for
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
26
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
27 DUPLICATE_SCORING_STRATEGY="${adv_options.duplicate_scoring_strategy}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
28
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
29 #import pipes
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
30 READ_NAME_REGEX=${ pipes.quote( str( $adv_options.read_name_regex ) ) or "''" }
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
31 OPTICAL_DUPLICATE_PIXEL_DISTANCE="${adv_options.optical_duplicate_pixel_distance}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
32
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
33 VALIDATION_STRINGENCY="${adv_options.validation_stringency}"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
34 QUIET=true
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
35 VERBOSITY=ERROR
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
36
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
37 ## Je Markdupes Specific
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
38 MM=${MM}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
39 #if str($MAX_N) != "":
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
40 MAX_N=${MAX_N}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
41 #end if
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
42 @barcode_option_cmd@
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
43
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
44 #for $i, $option in enumerate( $repeat_slots )
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
45 #if str($option.SLOTS) != "":
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
46 SLOTS=${option.SLOTS}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
47 #end if
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
48 #end for
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
49
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
50 #if str($trim_conditional.T) == "true":
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
51 T=${trim_conditional.T}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
52 #for $i, $option in enumerate( $trim_conditional.repeat_tslots )
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
53 #if str($option.TSLOTS) != "":
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
54 TSLOTS=${option.TSLOTS}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
55 #end if
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
56 #end for
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
57 #end if
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
58 ]]>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
59 </command>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
60 <configfiles>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
61 <expand macro="barcode_config_file"></expand>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
62 </configfiles>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
63
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
64 <inputs>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
65 <param format="bam,sam" name="inputFile" type="data" label="Select SAM/BAM dataset"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
66 help="If empty, upload or import a SAM/BAM dataset"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
67 <param name="remove_duplicates" type="boolean" label="If true do not write duplicates to the output file
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
68 instead of writing them with appropriate flags set" help="REMOVE_DUPLICATES; default=False"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
69 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
70 truevalue="true" falsevalue="false" help="ASSUME_SORTED; default=True"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
71 <conditional name="barcodes">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
72 <param name="barcode_list_type_con" type="select" label="Do you have a predefined list of UMIs">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
73 <option value="file" selected="true">A one column txt file from the history</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
74 <option value="text">Paste the UMI list in a text field</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
75 <option value="no_barcodes">No predefined list</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
76 </param>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
77
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
78 <when value="file">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
79 <param name="BARCODE_FILE" type="data" format="tabular,txt" label="UMI file"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
80 help="BARCODE_FILE. Pre-defined list of Unique Molecular Identifiers that can be expected.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
81 Format: one column text file, one UMI per line. All UMIs MUST have the same length."/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
82 </when>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
83
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
84 <when value="text">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
85 <param name="barcode_text" type="text" area="True" size="10x30"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
86 value="barcode\n" label="Barcode file"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
87 help="BARCODE_FILE. Pre-defined list of Unique Molecular Identifiers that can be expected.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
88 Format: one column text file, one UMI per line. All UMIs MUST have the same length.">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
89 <sanitizer>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
90 <valid initial="string.printable"></valid>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
91 <mapping initial="none"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
92 </sanitizer>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
93 </param>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
94 </when>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
95 <when value="no_barcodes"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
96 </conditional>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
97 <repeat name="repeat_slots" min="1" title="Unique Molecular Identifier location">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
98 <param name="SLOTS" type="text" value="-1" label="Where to find the UMIs in the read name"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
99 help="SLOTS. The last position is considered by default (-1). See help below."/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
100 </repeat>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
101 <param name="MM" type="integer" value="1" min="0"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
102 label="Number of maximum mismatches to consider two Unique Molecular Identifiers (UMIs) similar"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
103 help="MISMATCHES"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
104 <param name="MAX_N" type="text" value="" label="Maximum number of Ns a UMI can contain"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
105 help="MAX_NUMBER_OF_N. Above this value, reads are placed in a 'undefined' group.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
106 Default value is the MISMATCHES number."/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
107 <param name="SPLIT" type="text" value=":" label="Character to split up the header" help="SPLIT"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
108 <conditional name="trim_conditional">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
109 <param name="T" type="select"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
110 label="Should barcode information be removed from read names in the output BAM" help="TRIM_HEADERS">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
111 <option value="true">Yes</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
112 <option value="false" selected="true">No</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
113 </param>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
114 <when value="true">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
115 <repeat name="repeat_tslots" min="1" title="Unique Molecular Identifier location for trimming">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
116 <param name="TSLOTS" type="text" value="-1"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
117 label="Where to find the UMIs in the read name that should be removed from the header"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
118 help="TSLOTS. Value for SLOTS is considered by default. See help below"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
119 </repeat>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
120 </when>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
121 <when value="false"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
122 </conditional>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
123 <section name="adv_options" title="Advanced Options" expanded="False">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
124 <repeat name="comments" title="Comment" min="0" help="You can provide multiple comments">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
125 <param name="comment" type="text" label="Add this comment to BAM dataset"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
126 </repeat>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
127
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
128 <param name="duplicate_scoring_strategy" type="select" label="The scoring strategy for choosing the
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
129 non-duplicate among candidates" help="DUPLICATE_SCORING_STRATEGY; default=SUM_OF_BASE_QUALITIES">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
130 <option value="SUM_OF_BASE_QUALITIES">SUM_OF_BASE_QUALITIES</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
131 <option value="TOTAL_MAPPED_REFERENCE_LENGTH">TOTAL_MAPPED_REFERENCE_LENGTH</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
132 </param>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
133
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
134 <param name="read_name_regex" type="text" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*."
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
135 label="Regular expression that can be used to parse read names in the incoming SAM/BAM dataset"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
136 help="READ_NAME_REGEX; Read names are parsed to extract three variables: tile/region, x coordinate and
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
137 y coordinate. These values are used to estimate the rate of optical duplication in order to give a more
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
138 accurate estimated library size. See help below for more info;
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
139 default=[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
140 <sanitizer>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
141 <valid initial="string.printable">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
142 </valid>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
143 </sanitizer>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
144 </param>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
145 <param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
146 label="The maximum offset between two duplicte clusters in order to consider them optical duplicates"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
147 help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
148
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
149 <param name="validation_stringency" type="select" label="Select validation stringency"
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
150 help="Setting stringency to SILENT can improve performance when processing a BAM file in which
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
151 variable-length data (read, qualities, tags) do not otherwise need to be decoded.">
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
152 <option value="LENIENT" selected="True">Lenient</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
153 <option value="SILENT">Silent</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
154 <option value="STRICT">Strict</option>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
155 </param>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
156 </section>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
157 </inputs>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
158 <outputs>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
159 <data format="bam" name="outFile" label="${tool.name} on ${on_string}: Je-MarkDuplicates BAM output"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
160 <data format="txt" name="metrics_file" label="${tool.name} on ${on_string}: Je-MarkDuplicate metrics"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
161 </outputs>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
162
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
163 <tests>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
164 <test>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
165 <!-- picard markduplicates default test -->
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
166 <param name="inputFile" value="markdupes_DNase_sorted.bam" ftype="bam"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
167 <param name="barcode_list_type_con" value="file"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
168 <param name="BARCODE_FILE" value="markdupes_umis.txt" ftype="txt"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
169 <param name="repeat_slots_0|SLOTS" value="-1"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
170 <param name="repeat_slots_1|SLOTS" value="-2"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
171 <param name="MM" value="2"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
172 <param name="MAX_N" value="1"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
173 <param name="comment" value="test-run"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
174 <param name="assume_sorted" value="True"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
175 <param name="remove_duplicates" value="True"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
176 <param name="read_name_regex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*."/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
177 <param name="optical_duplicate_pixel_distance" value="100"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
178 <param name="duplicate_scoring_strategy" value="SUM_OF_BASE_QUALITIES"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
179 <param name="validation_stringency" value="LENIENT"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
180 <output name="outFile" file="markdupes_DNase_sorted_marked.bam" ftype="bam" lines_diff="2"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
181 <output name="metrics_file" file="markdupes_metrics.txt" ftype="txt" lines_diff="4"/>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
182 </test>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
183 </tests>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
184
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
185
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
186 <help>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
187 <![CDATA[
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
188 **What it does**
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
189
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
190 Je MarkDupes: Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules taking into account
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
191 molecular barcodes (Unique Molecular Identifiers or UMIs) found in read header.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
192 All records are then either written to the output file with the duplicate records flagged or trashed.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
193
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
194 Input file is a bam file.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
195
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
196 Author: Charles Girardot (charles.girardot@embl.de).
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
197
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
198 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de).
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
199
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
200 ------
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
201
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
202 **Know what you are doing**
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
203
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
204 .. class:: warningmark
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
205
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
206 You will want to read the `documentation`__.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
207
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
208 .. __: http://gbcs.embl.de/portal/Je
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
209
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
210 ------
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
211
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
212 **Parameter list**
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
213
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
214 This is an exhaustive list of options::
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
215
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
216 INPUT=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
217 I=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
218
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
219 One or more input SAM or BAM files to analyze. Must be coordinate sorted.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
220
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
221 Default value: null. This option may be specified 0 or more times.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
222
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
223 OUTPUT=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
224 O=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
225
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
226 The output file to write marked records to
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
227
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
228 Required.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
229
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
230 MISMATCHES=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
231 MM=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
232
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
233 Number of MisMatches (inclusive) to still consider two Unique Molecular Identifiers
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
234 (UMIs) the same i.e. this option buffers for sequencing errors.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
235 Indeed, in case of a sequencing error, 2 duplicate reads would not be considered
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
236 duplicates anymore.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
237 Note that N are not considered mismatches during comparison ie ATTNGG and NTTANG are seen
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
238 as the same barcode and these two reads would be flagged duplicates.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
239 This option takes a single value even when several barcodes are present (see SLOTS).
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
240 Note that when declaring several barcodes (see SLOTS) AND providing a predefined set
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
241 of barcodes (see BC option), the MM value is applicable in each lookup. When a predefined
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
242 set of barcodes is NOT given, the different barcodes (SLOTS) are concatenated first and
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
243 the MM value is therefore considered *overall* as the concatenated code is seen as a
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
244 unique code.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
245 MM=null is like MM=0
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
246 Use the minimum Hamming distance of the original barcode set (if applicable).
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
247
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
248 Required.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
249
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
250 MAX_NUMBER_OF_N=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
251 MAX_N=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
252
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
253 Maximum number of Ns a molecular code can contain (inclusive). Above this value, reads
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
254 are placed in a UNDEF group.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
255 More precisely, these 'too degenarate' codes will not :
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
256 * be compared to the list of predefined codes [predefined code list situation ie BC
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
257 option given] nor
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
258 * be considered as a potential independent code [no predefined code list situation ie
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
259 BC option not given]
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
260 Default value is the MISMATCHES number.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
261 Note that when declaring several barcodes (see SLOTS) AND providing a predefined set
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
262 of barcodes (see BC option), the MAX_N value is applicable to each barcode. When a
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
263 predefined set
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
264 of barcodes is NOT given, the different barcodes (SLOTS) are concatenated first and the
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
265 MAX_N value
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
266 is therefore considered *overall*.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
267
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
268 Default value: null.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
269
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
270
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
271 SLOTS=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
272 SLOTS=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
273
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
274 Where to find the UMIs (and only the UMIs) in the read name once read name has been
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
275 tokenized using the SPLIT character (e.g. ':').
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
276 By default, the UMI is considered to be found at the end of the read header i.e. after
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
277 the last ':'. Use this option to indicate other or additional UMI positions (e.g.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
278 multiple UMIs present in read header.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
279 IMPORTANT: counting starts at 1 and negative numbers can be used to start counting from
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
280 the end.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
281 For example, consider the following read name that lists 3 different barcodes in the end:
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
282 HISEQ:44:C6KC0ANXX:8:2112:20670:79594:CGATGTTT:GATCCTAG:AAGGTACG
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
283 to indicate that the three barcodes are molecular codes, use
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
284 SLOTS=-1 SLOTS=-2 SLOTS=-3
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
285 if only the 2 last ones should be considered (the third one being a sample encoding
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
286 barcode), use
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
287 SLOTS=-1 SLOTS=-2
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
288
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
289 Default value: null. This option may be specified 0 or more times.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
290
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
291 BARCODE_FILE=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
292 BC=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
293
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
294 Pre-defined list of UMIs that can be expected. Format: one column text file, one barcode
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
295 per line. All UMIs MUST have the same length.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
296
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
297 Default value: null.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
298
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
299 TRIM_HEADERS=Boolean
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
300 T=Boolean
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
301
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
302 Should barcode information be removed from read names in the output BAM?
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
303
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
304 Default value: false. This option can be set to 'null' to clear the default value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
305 Possible values: {true, false}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
306
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
307 TSLOTS=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
308 TSLOTS=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
309
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
310 Where to find *all* barcode(s) (i.e. sample encoding and UMIs) in the read name once has
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
311 been tokenized using the SPLIT character (e.g. ':').
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
312 This option is only considered when TRIM_HEADERS=true. When TSLOTS is ommited while
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
313 TRIM_HEADERS=true, the values of SLOTS apply.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
314 IMPORTANT : counting starts at 1 and negative numbers can be used to start counting from
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
315 the end.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
316 See SLOT help for examples.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
317
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
318 Default value: null. This option may be specified 0 or more times.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
319
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
320 SPLIT_CHAR=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
321 SPLIT=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
322
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
323 Character to use to split up the read header line, default is ':'.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
324
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
325 Default value: ':'. This option can be set to 'null' to clear the default value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
326
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
327 INPUT=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
328 I=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
329
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
330 One or more input SAM or BAM files to analyze. Must be coordinate sorted.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
331
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
332 Default value: null. This option may be specified 0 or more times.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
333
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
334 OUTPUT=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
335 O=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
336
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
337 The output file to write marked records to Required.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
338
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
339 METRICS_FILE=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
340 M=File
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
341
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
342 File to write duplication metrics to Required.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
343
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
344 COMMENT=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
345 CO=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
346
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
347 Comment(s) to include in the output file's header.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
348
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
349 Default value: null. This option may be specified 0 or more times.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
350
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
351 REMOVE_DUPLICATES=Boolean
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
352
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
353 If true do not write duplicates to the output file instead of writing them with
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
354 appropriate flags set.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
355
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
356 Default value: false. This option can be set to 'null' to clear
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
357 the default value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
358 Possible values: {true, false}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
359
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
360 ASSUME_SORTED=Boolean
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
361 AS=Boolean
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
362
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
363 If true, assume that the input file is coordinate sorted even if the header says
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
364 otherwise.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
365
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
366 Default value: false. This option can be set to 'null' to clear the default
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
367 value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
368 Possible values: {true, false}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
369
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
370 DUPLICATE_SCORING_STRATEGY=ScoringStrategy
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
371 DS=ScoringStrategy
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
372
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
373 The scoring strategy for choosing the non-duplicate among candidates.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
374
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
375 Default value: SUM_OF_BASE_QUALITIES. This option can be set to 'null' to clear the default value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
376 Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH}
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
377
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
378 READ_NAME_REGEX=String
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
379
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
380 Regular expression that can be used to parse read names in the incoming SAM file. Read
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
381 names are parsed to extract three variables: tile/region, x coordinate and y coordinate.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
382 These values are used to estimate the rate of optical duplication in order to give a more
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
383 accurate estimated library size. Set this option to null to disable optical duplicate
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
384 detection. The regular expression should contain three capture groups for the three
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
385 variables, in order. It must match the entire read name. Note that if the default regex
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
386 is specified, a regex match is not actually done, but instead the read name is split on
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
387 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
388 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
389 are assumed to be tile, x and y values.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
390
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
391 Default value:
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
392 [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. This option can be set to 'null' to
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
393 clear the default value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
394
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
395 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
396
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
397 The maximum offset between two duplicte clusters in order to consider them optical
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
398 duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels)
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
399 unless using later versions of the Illumina pipeline that multiply pixel values by 10, in
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
400 which case 50-100 is more normal.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
401
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
402 Default value: 100. This option can be set to 'null'
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
403 to clear the default value.
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
404
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
405 ]]>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
406 </help>
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
407
d39a96961423 Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
408 </tool>