Mercurial > repos > avowinkel > picard
diff picard_ValidateSamFile.xml @ 0:5166ed57b1c4 draft
Uploaded version 1.135
author | avowinkel |
---|---|
date | Mon, 06 Jul 2015 14:46:32 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/picard_ValidateSamFile.xml Mon Jul 06 14:46:32 2015 -0400 @@ -0,0 +1,226 @@ +<tool name="ValidateSamFile" id="picard_ValidateSamFile" version="1.135"> + <description>assess validity of SAM/BAM dataset</description> + <macros> + <import>picard_macros.xml</import> + </macros> + <expand macro="requirements" /> + <command> + + ##set the maximum number of open file to hard maximum or 4096 if on a mac (mac gives 'unlimited' as output of `ulimit -Hn` command + + [ `ulimit -Hn` = unlimited ] && ulimit -Sn 4096 || ulimit -Sn `ulimit -Hn` + + && + + ##set up input files + + #set $reference_fasta_filename = "localref.fa" + + #if str( $reference_source.reference_source_selector ) == "history": + ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && + #else: + #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) + #end if + + @java_options@ + + java -jar \$JAVA_JAR_PATH/picard.jar + ValidateSamFile + + INPUT="${inputFile}" + OUTPUT="${outFile}" + MODE="${mode}" + + #if str( $ignore ) != "None": + #for $element in str( $ignore ).split(','): ## See trello card https://trello.com/c/9nW02Zhd + IGNORE="${element}" + #end for + #end if + + MAX_OUTPUT="${max_output}" + REFERENCE_SEQUENCE="${reference_fasta_filename}" + IGNORE_WARNINGS="${ignore_warnings}" + IS_BISULFITE_SEQUENCED="${is_bisulfite_sequenced}" + MAX_OPEN_TEMP_FILES=`ulimit -Sn` + + VERBOSITY=ERROR + QUIET=true + + </command> + <inputs> + <param format="sam,bam" name="inputFile" type="data" label="SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset." /> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Load reference genome from"> + <option value="cached">Local cache</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list"> + <options from_data_table="picard_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" /> + </when> + </conditional> + + <param name="mode" type="select" label="Select output mode" help="MODE"> + <option value="VERBOSE">Verbose</option> + <option value="SUMMARY">Summary</option> + </param> + <param name="ignore" type="select" multiple="true" display="checkboxes" label="Select validation error types to ignore" help="IGNORE"> + <option value="INVALID_QUALITY_FORMAT">INVALID_QUALITY_FORMAT</option> + <option value="INVALID_FLAG_PROPER_PAIR">INVALID_FLAG_PROPER_PAIR</option> + <option value="INVALID_FLAG_MATE_UNMAPPED"/> + <option value="MISMATCH_FLAG_MATE_UNMAPPED"/> + <option value="INVALID_FLAG_MATE_NEG_STRAND"/> + <option value="MISMATCH_FLAG_MATE_NEG_STRAND"/> + <option value="INVALID_FLAG_FIRST_OF_PAIR"/> + <option value="INVALID_FLAG_SECOND_OF_PAIR"/> + <option value="PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND"/> + <option value="INVALID_FLAG_NOT_PRIM_ALIGNMENT"/> + <option value="INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT"/> + <option value="INVALID_FLAG_READ_UNMAPPED"/> + <option value="INVALID_INSERT_SIZE"/> + <option value="INVALID_MAPPING_QUALITY"/> + <option value="INVALID_CIGAR"/> + <option value="ADJACENT_INDEL_IN_CIGAR"/> + <option value="INVALID_MATE_REF_INDEX"/> + <option value="MISMATCH_MATE_REF_INDEX"/> + <option value="INVALID_REFERENCE_INDEX"/> + <option value="INVALID_ALIGNMENT_START"/> + <option value="MISMATCH_MATE_ALIGNMENT_START"/> + <option value="MATE_FIELD_MISMATCH"/> + <option value="INVALID_TAG_NM"/> + <option value="MISSING_TAG_NM"/> + <option value="MISSING_HEADER"/> + <option value="MISSING_SEQUENCE_DICTIONARY"/> + <option value="MISSING_READ_GROUP"/> + <option value="RECORD_OUT_OF_ORDER"/> + <option value="READ_GROUP_NOT_FOUND"/> + <option value="RECORD_MISSING_READ_GROUP"/> + <option value="INVALID_INDEXING_BIN"/> + <option value="MISSING_VERSION_NUMBER"/> + <option value="INVALID_VERSION_NUMBER"/> + <option value="TRUNCATED_FILE"/> + <option value="MISMATCH_READ_LENGTH_AND_QUALS_LENGTH"/> + <option value="EMPTY_READ"/> + <option value="CIGAR_MAPS_OFF_REFERENCE"/> + <option value="MISMATCH_READ_LENGTH_AND_E2_LENGTH"/> + <option value="MISMATCH_READ_LENGTH_AND_U2_LENGTH"/> + <option value="E2_BASE_EQUALS_PRIMARY_BASE"/> + <option value="BAM_FILE_MISSING_TERMINATOR_BLOCK"/> + <option value="UNRECOGNIZED_HEADER_TYPE"/> + <option value="POORLY_FORMATTED_HEADER_TAG"/> + <option value="HEADER_TAG_MULTIPLY_DEFINED"/> + <option value="HEADER_RECORD_MISSING_REQUIRED_TAG"/> + <option value="INVALID_DATE_STRING"/> + <option value="TAG_VALUE_TOO_LARGE"/> + <option value="INVALID_INDEX_FILE_POINTER"/> + <option value="INVALID_PREDICTED_MEDIAN_INSERT_SIZE"/> + <option value="DUPLICATE_READ_GROUP_ID"/> + <option value="MISSING_PLATFORM_VALUE"/> + <option value="INVALID_PLATFORM_VALUE"/> + <option value="DUPLICATE_PROGRAM_GROUP_ID"/> + <option value="MATE_NOT_FOUND"/> + <option value="MATES_ARE_SAME_END"/> + <option value="MISMATCH_MATE_CIGAR_STRING"/> + <option value="MATE_CIGAR_STRING_INVALID_PRESENCE"/> + </param> + <param name="max_output" type="integer" value="100" label="The maximum number of lines output in verbose mode" help="MAX_OUTPUT; default=100"/> + <param name="ignore_warnings" type="boolean" label="If true, only report errors and ignore warnings" help="IGNORE_WARNINGS; default=False"/> + <param name="validate_index" type="boolean" checked="True" label="If true and input is a BAM file with an index file, also validates the index" help="VALIDATE_INDEX; default=True"/> + <param name="is_bisulfite_sequenced" type="boolean" label="Whether the SAM or BAM file consists of bisulfite sequenced reads" help="IS_BISULFITE_SEQUENCED; If so, C->T is not counted as an error in computing the value of the NM tag; default=False"/> + + </inputs> + + <outputs> + <data format="txt" name="outFile" label="${tool.name} on ${on_string}: BAM validation summary"/> + </outputs> + + <tests> + <test> + <param name="inputFile" value="picard_ValidateSamFile.bam" ftype="bam"/> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="picard_ValidateSamFile_ref.fa"/> + <param name="mode" value="VERBOSE"/> + <param name="ignore" value="INVALID_QUALITY_FORMAT,INVALID_FLAG_PROPER_PAIR"/> + <param name="max_output" value="100"/> + <param name="ignore_warnings" value="Fasle"/> + <param name="validate_index" value="True"/> + <param name="is_bisulfite_sequenced" value="False"/> + <output name="outFile" file="picard_ValidateSamFile_test1.txt" ftype="txt"/> + </test> + </tests> + + <stdio> + <exit_code range="1:" level="warning"/> + </stdio> + + <help> + +**Purpose** + +Reads a SAM/BAM dataset and report on its validity. + +@dataset_collections@ + +@description@ + + MODE=Mode + M=Mode Mode of output Default value: VERBOSE. This option can be set to 'null' to clear the + default value. Possible values: {VERBOSE, SUMMARY} + + IGNORE=Type List of validation error types to ignore. Possible values: {INVALID_QUALITY_FORMAT, + INVALID_FLAG_PROPER_PAIR, INVALID_FLAG_MATE_UNMAPPED, MISMATCH_FLAG_MATE_UNMAPPED, + INVALID_FLAG_MATE_NEG_STRAND, MISMATCH_FLAG_MATE_NEG_STRAND, INVALID_FLAG_FIRST_OF_PAIR, + INVALID_FLAG_SECOND_OF_PAIR, PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND, + INVALID_FLAG_NOT_PRIM_ALIGNMENT, INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT, + INVALID_FLAG_READ_UNMAPPED, INVALID_INSERT_SIZE, INVALID_MAPPING_QUALITY, INVALID_CIGAR, + ADJACENT_INDEL_IN_CIGAR, INVALID_MATE_REF_INDEX, MISMATCH_MATE_REF_INDEX, + INVALID_REFERENCE_INDEX, INVALID_ALIGNMENT_START, MISMATCH_MATE_ALIGNMENT_START, + MATE_FIELD_MISMATCH, INVALID_TAG_NM, MISSING_TAG_NM, MISSING_HEADER, + MISSING_SEQUENCE_DICTIONARY, MISSING_READ_GROUP, RECORD_OUT_OF_ORDER, + READ_GROUP_NOT_FOUND, RECORD_MISSING_READ_GROUP, INVALID_INDEXING_BIN, + MISSING_VERSION_NUMBER, INVALID_VERSION_NUMBER, TRUNCATED_FILE, + MISMATCH_READ_LENGTH_AND_QUALS_LENGTH, EMPTY_READ, CIGAR_MAPS_OFF_REFERENCE, + MISMATCH_READ_LENGTH_AND_E2_LENGTH, MISMATCH_READ_LENGTH_AND_U2_LENGTH, + E2_BASE_EQUALS_PRIMARY_BASE, BAM_FILE_MISSING_TERMINATOR_BLOCK, UNRECOGNIZED_HEADER_TYPE, + POORLY_FORMATTED_HEADER_TAG, HEADER_TAG_MULTIPLY_DEFINED, + HEADER_RECORD_MISSING_REQUIRED_TAG, INVALID_DATE_STRING, TAG_VALUE_TOO_LARGE, + INVALID_INDEX_FILE_POINTER, INVALID_PREDICTED_MEDIAN_INSERT_SIZE, + DUPLICATE_READ_GROUP_ID, MISSING_PLATFORM_VALUE, INVALID_PLATFORM_VALUE, + DUPLICATE_PROGRAM_GROUP_ID, MATE_NOT_FOUND, MATES_ARE_SAME_END, + MISMATCH_MATE_CIGAR_STRING, MATE_CIGAR_STRING_INVALID_PRESENCE} This option may be + specified 0 or more times. + + MAX_OUTPUT=Integer + MO=Integer The maximum number of lines output in verbose mode Default value: 100. This option can + be set to 'null' to clear the default value. + + REFERENCE_SEQUENCE=File + R=File Reference sequence file, the NM tag check will be skipped if this is missing Default + value: null. + + IGNORE_WARNINGS=Boolean If true, only report errors and ignore warnings. Default value: false. This option can + be set to 'null' to clear the default value. Possible values: {true, false} + + VALIDATE_INDEX=Boolean If true and input is a BAM file with an index file, also validates the index. Default + value: true. This option can be set to 'null' to clear the default value. Possible + values: {true, false} + + IS_BISULFITE_SEQUENCED=Boolean + BISULFITE=Boolean Whether the SAM or BAM file consists of bisulfite sequenced reads. If so, C->T is not + counted as an error in computing the value of the NM tag. Default value: false. This + option can be set to 'null' to clear the default value. Possible values: {true, false} + +@more_info@ + + </help> +</tool> + +