0
|
1 <tool name="ValidateSamFile" id="picard_ValidateSamFile" version="1.122.0">
|
|
2 <description>assess validity of SAM/BAM dataset</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.122.0">picard</requirement>
|
|
5 </requirements>
|
|
6
|
|
7 <macros>
|
|
8 <import>picard_macros.xml</import>
|
|
9 </macros>
|
|
10
|
|
11 <command>
|
|
12
|
|
13 ##set the maximum number of open file to hard maximum or 4096 if on a mac (mac gives 'unlimited' as output of `ulimit -Hn` command
|
|
14
|
|
15 [ `ulimit -Hn` = unlimited ] && ulimit -Sn 4096 || ulimit -Sn `ulimit -Hn`
|
|
16
|
|
17 &&
|
|
18
|
|
19 ##set up input files
|
|
20
|
|
21 #set $reference_fasta_filename = "localref.fa"
|
|
22
|
|
23 #if str( $reference_source.reference_source_selector ) == "history":
|
|
24 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &&
|
|
25 #else:
|
|
26 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
|
|
27 #end if
|
|
28
|
|
29 @java_options@
|
|
30
|
|
31 java -jar \$JAVA_JAR_PATH/ValidateSamFile.jar
|
|
32
|
|
33 INPUT="${inputFile}"
|
|
34 OUTPUT="${outFile}"
|
|
35 MODE="${mode}"
|
|
36
|
|
37 #if str( $ignore ) != "None":
|
|
38 #for $element in str( $ignore ).split(','): ## See trello card https://trello.com/c/9nW02Zhd
|
|
39 IGNORE="${element}"
|
|
40 #end for
|
|
41 #end if
|
|
42
|
|
43 MAX_OUTPUT="${max_output}"
|
|
44 REFERENCE_SEQUENCE="${reference_fasta_filename}"
|
|
45 IGNORE_WARNINGS="${ignore_warnings}"
|
|
46 IS_BISULFITE_SEQUENCED="${is_bisulfite_sequenced}"
|
|
47 MAX_OPEN_TEMP_FILES=`ulimit -Sn`
|
|
48
|
|
49 VERBOSITY=ERROR
|
|
50 QUIET=true
|
|
51
|
|
52 </command>
|
|
53 <inputs>
|
|
54 <param format="sam,bam" name="inputFile" type="data" label="SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset." />
|
|
55 <conditional name="reference_source">
|
|
56 <param name="reference_source_selector" type="select" label="Load reference genome from">
|
|
57 <option value="cached">Local cache</option>
|
|
58 <option value="history">History</option>
|
|
59 </param>
|
|
60 <when value="cached">
|
|
61 <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
|
|
62 <options from_data_table="picard_indexes">
|
|
63 <filter type="sort_by" column="2" />
|
|
64 <validator type="no_options" message="No indexes are available" />
|
|
65 </options>
|
|
66 <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
|
|
67 </param>
|
|
68 </when>
|
|
69 <when value="history">
|
|
70 <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" />
|
|
71 </when>
|
|
72 </conditional>
|
|
73
|
|
74 <param name="mode" type="select" label="Select output mode" help="MODE">
|
|
75 <option value="VERBOSE">Verbose</option>
|
|
76 <option value="SUMMARY">Summary</option>
|
|
77 </param>
|
|
78 <param name="ignore" type="select" multiple="true" display="checkboxes" label="Select validation error types to ignore" help="IGNORE">
|
|
79 <option value="INVALID_QUALITY_FORMAT">INVALID_QUALITY_FORMAT</option>
|
|
80 <option value="INVALID_FLAG_PROPER_PAIR">INVALID_FLAG_PROPER_PAIR</option>
|
|
81 <option value="INVALID_FLAG_MATE_UNMAPPED"/>
|
|
82 <option value="MISMATCH_FLAG_MATE_UNMAPPED"/>
|
|
83 <option value="INVALID_FLAG_MATE_NEG_STRAND"/>
|
|
84 <option value="MISMATCH_FLAG_MATE_NEG_STRAND"/>
|
|
85 <option value="INVALID_FLAG_FIRST_OF_PAIR"/>
|
|
86 <option value="INVALID_FLAG_SECOND_OF_PAIR"/>
|
|
87 <option value="PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND"/>
|
|
88 <option value="INVALID_FLAG_NOT_PRIM_ALIGNMENT"/>
|
|
89 <option value="INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT"/>
|
|
90 <option value="INVALID_FLAG_READ_UNMAPPED"/>
|
|
91 <option value="INVALID_INSERT_SIZE"/>
|
|
92 <option value="INVALID_MAPPING_QUALITY"/>
|
|
93 <option value="INVALID_CIGAR"/>
|
|
94 <option value="ADJACENT_INDEL_IN_CIGAR"/>
|
|
95 <option value="INVALID_MATE_REF_INDEX"/>
|
|
96 <option value="MISMATCH_MATE_REF_INDEX"/>
|
|
97 <option value="INVALID_REFERENCE_INDEX"/>
|
|
98 <option value="INVALID_ALIGNMENT_START"/>
|
|
99 <option value="MISMATCH_MATE_ALIGNMENT_START"/>
|
|
100 <option value="MATE_FIELD_MISMATCH"/>
|
|
101 <option value="INVALID_TAG_NM"/>
|
|
102 <option value="MISSING_TAG_NM"/>
|
|
103 <option value="MISSING_HEADER"/>
|
|
104 <option value="MISSING_SEQUENCE_DICTIONARY"/>
|
|
105 <option value="MISSING_READ_GROUP"/>
|
|
106 <option value="RECORD_OUT_OF_ORDER"/>
|
|
107 <option value="READ_GROUP_NOT_FOUND"/>
|
|
108 <option value="RECORD_MISSING_READ_GROUP"/>
|
|
109 <option value="INVALID_INDEXING_BIN"/>
|
|
110 <option value="MISSING_VERSION_NUMBER"/>
|
|
111 <option value="INVALID_VERSION_NUMBER"/>
|
|
112 <option value="TRUNCATED_FILE"/>
|
|
113 <option value="MISMATCH_READ_LENGTH_AND_QUALS_LENGTH"/>
|
|
114 <option value="EMPTY_READ"/>
|
|
115 <option value="CIGAR_MAPS_OFF_REFERENCE"/>
|
|
116 <option value="MISMATCH_READ_LENGTH_AND_E2_LENGTH"/>
|
|
117 <option value="MISMATCH_READ_LENGTH_AND_U2_LENGTH"/>
|
|
118 <option value="E2_BASE_EQUALS_PRIMARY_BASE"/>
|
|
119 <option value="BAM_FILE_MISSING_TERMINATOR_BLOCK"/>
|
|
120 <option value="UNRECOGNIZED_HEADER_TYPE"/>
|
|
121 <option value="POORLY_FORMATTED_HEADER_TAG"/>
|
|
122 <option value="HEADER_TAG_MULTIPLY_DEFINED"/>
|
|
123 <option value="HEADER_RECORD_MISSING_REQUIRED_TAG"/>
|
|
124 <option value="INVALID_DATE_STRING"/>
|
|
125 <option value="TAG_VALUE_TOO_LARGE"/>
|
|
126 <option value="INVALID_INDEX_FILE_POINTER"/>
|
|
127 <option value="INVALID_PREDICTED_MEDIAN_INSERT_SIZE"/>
|
|
128 <option value="DUPLICATE_READ_GROUP_ID"/>
|
|
129 <option value="MISSING_PLATFORM_VALUE"/>
|
|
130 <option value="INVALID_PLATFORM_VALUE"/>
|
|
131 <option value="DUPLICATE_PROGRAM_GROUP_ID"/>
|
|
132 <option value="MATE_NOT_FOUND"/>
|
|
133 <option value="MATES_ARE_SAME_END"/>
|
|
134 <option value="MISMATCH_MATE_CIGAR_STRING"/>
|
|
135 <option value="MATE_CIGAR_STRING_INVALID_PRESENCE"/>
|
|
136 </param>
|
|
137 <param name="max_output" type="integer" value="100" label="The maximum number of lines output in verbose mode" help="MAX_OUTPUT; default=100"/>
|
|
138 <param name="ignore_warnings" type="boolean" label="If true, only report errors and ignore warnings" help="IGNORE_WARNINGS; default=False"/>
|
|
139 <param name="validate_index" type="boolean" checked="True" label="If true and input is a BAM file with an index file, also validates the index" help="VALIDATE_INDEX; default=True"/>
|
|
140 <param name="is_bisulfite_sequenced" type="boolean" label="Whether the SAM or BAM file consists of bisulfite sequenced reads" help="IS_BISULFITE_SEQUENCED; If so, C->T is not counted as an error in computing the value of the NM tag; default=False"/>
|
|
141
|
|
142 </inputs>
|
|
143
|
|
144 <outputs>
|
|
145 <data format="txt" name="outFile" label="${tool.name} on ${on_string}: BAM validation summary"/>
|
|
146 </outputs>
|
|
147
|
|
148 <tests>
|
|
149 <test>
|
|
150 <param name="inputFile" value="picard_ValidateSamFile.bam" ftype="bam"/>
|
|
151 <param name="reference_source_selector" value="history"/>
|
|
152 <param name="ref_file" value="picard_ValidateSamFile_ref.fa"/>
|
|
153 <param name="mode" value="VERBOSE"/>
|
|
154 <param name="ignore" value="INVALID_QUALITY_FORMAT,INVALID_FLAG_PROPER_PAIR"/>
|
|
155 <param name="max_output" value="100"/>
|
|
156 <param name="ignore_warnings" value="Fasle"/>
|
|
157 <param name="validate_index" value="True"/>
|
|
158 <param name="is_bisulfite_sequenced" value="False"/>
|
|
159 <output name="outFile" file="picard_ValidateSamFile_test1.txt" ftype="txt"/>
|
|
160 </test>
|
|
161 </tests>
|
|
162
|
|
163 <stdio>
|
|
164 <exit_code range="1:" level="warning"/>
|
|
165 </stdio>
|
|
166
|
|
167 <help>
|
|
168
|
|
169 **Purpose**
|
|
170
|
|
171 Reads a SAM/BAM dataset and report on its validity.
|
|
172
|
|
173 @dataset_collections@
|
|
174
|
|
175 @description@
|
|
176
|
|
177 MODE=Mode
|
|
178 M=Mode Mode of output Default value: VERBOSE. This option can be set to 'null' to clear the
|
|
179 default value. Possible values: {VERBOSE, SUMMARY}
|
|
180
|
|
181 IGNORE=Type List of validation error types to ignore. Possible values: {INVALID_QUALITY_FORMAT,
|
|
182 INVALID_FLAG_PROPER_PAIR, INVALID_FLAG_MATE_UNMAPPED, MISMATCH_FLAG_MATE_UNMAPPED,
|
|
183 INVALID_FLAG_MATE_NEG_STRAND, MISMATCH_FLAG_MATE_NEG_STRAND, INVALID_FLAG_FIRST_OF_PAIR,
|
|
184 INVALID_FLAG_SECOND_OF_PAIR, PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND,
|
|
185 INVALID_FLAG_NOT_PRIM_ALIGNMENT, INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT,
|
|
186 INVALID_FLAG_READ_UNMAPPED, INVALID_INSERT_SIZE, INVALID_MAPPING_QUALITY, INVALID_CIGAR,
|
|
187 ADJACENT_INDEL_IN_CIGAR, INVALID_MATE_REF_INDEX, MISMATCH_MATE_REF_INDEX,
|
|
188 INVALID_REFERENCE_INDEX, INVALID_ALIGNMENT_START, MISMATCH_MATE_ALIGNMENT_START,
|
|
189 MATE_FIELD_MISMATCH, INVALID_TAG_NM, MISSING_TAG_NM, MISSING_HEADER,
|
|
190 MISSING_SEQUENCE_DICTIONARY, MISSING_READ_GROUP, RECORD_OUT_OF_ORDER,
|
|
191 READ_GROUP_NOT_FOUND, RECORD_MISSING_READ_GROUP, INVALID_INDEXING_BIN,
|
|
192 MISSING_VERSION_NUMBER, INVALID_VERSION_NUMBER, TRUNCATED_FILE,
|
|
193 MISMATCH_READ_LENGTH_AND_QUALS_LENGTH, EMPTY_READ, CIGAR_MAPS_OFF_REFERENCE,
|
|
194 MISMATCH_READ_LENGTH_AND_E2_LENGTH, MISMATCH_READ_LENGTH_AND_U2_LENGTH,
|
|
195 E2_BASE_EQUALS_PRIMARY_BASE, BAM_FILE_MISSING_TERMINATOR_BLOCK, UNRECOGNIZED_HEADER_TYPE,
|
|
196 POORLY_FORMATTED_HEADER_TAG, HEADER_TAG_MULTIPLY_DEFINED,
|
|
197 HEADER_RECORD_MISSING_REQUIRED_TAG, INVALID_DATE_STRING, TAG_VALUE_TOO_LARGE,
|
|
198 INVALID_INDEX_FILE_POINTER, INVALID_PREDICTED_MEDIAN_INSERT_SIZE,
|
|
199 DUPLICATE_READ_GROUP_ID, MISSING_PLATFORM_VALUE, INVALID_PLATFORM_VALUE,
|
|
200 DUPLICATE_PROGRAM_GROUP_ID, MATE_NOT_FOUND, MATES_ARE_SAME_END,
|
|
201 MISMATCH_MATE_CIGAR_STRING, MATE_CIGAR_STRING_INVALID_PRESENCE} This option may be
|
|
202 specified 0 or more times.
|
|
203
|
|
204 MAX_OUTPUT=Integer
|
|
205 MO=Integer The maximum number of lines output in verbose mode Default value: 100. This option can
|
|
206 be set to 'null' to clear the default value.
|
|
207
|
|
208 REFERENCE_SEQUENCE=File
|
|
209 R=File Reference sequence file, the NM tag check will be skipped if this is missing Default
|
|
210 value: null.
|
|
211
|
|
212 IGNORE_WARNINGS=Boolean If true, only report errors and ignore warnings. Default value: false. This option can
|
|
213 be set to 'null' to clear the default value. Possible values: {true, false}
|
|
214
|
|
215 VALIDATE_INDEX=Boolean If true and input is a BAM file with an index file, also validates the index. Default
|
|
216 value: true. This option can be set to 'null' to clear the default value. Possible
|
|
217 values: {true, false}
|
|
218
|
|
219 IS_BISULFITE_SEQUENCED=Boolean
|
|
220 BISULFITE=Boolean Whether the SAM or BAM file consists of bisulfite sequenced reads. If so, C->T is not
|
|
221 counted as an error in computing the value of the NM tag. Default value: false. This
|
|
222 option can be set to 'null' to clear the default value. Possible values: {true, false}
|
|
223
|
|
224 @more_info@
|
|
225
|
|
226 </help>
|
|
227 </tool>
|
|
228
|
|
229
|