diff picard_ValidateSamFile.xml @ 5:3d4f1fa26f0e draft

Uploaded
author devteam
date Tue, 16 Dec 2014 19:03:21 -0500
parents
children 379fdf4c41ac
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/picard_ValidateSamFile.xml	Tue Dec 16 19:03:21 2014 -0500
@@ -0,0 +1,230 @@
+<tool name="ValidateSamFile" id="picard_ValidateSamFile" version="1.122.0">
+  <description>assess validity of SAM/BAM dataset</description>
+  <requirements>
+    <requirement type="package" version="1.122.0">picard</requirement>
+  </requirements>
+  
+  <macros>
+    <import>picard_macros.xml</import>
+  </macros>
+  
+  <command>
+    
+    ##set the maximum number of open file to hard maximum or 4096 if on a mac (mac gives 'unlimited' as output of `ulimit -Hn` command
+    
+    [ `ulimit -Hn` = unlimited ] &amp;&amp; ulimit -Sn 4096 || ulimit -Sn `ulimit -Hn`
+    
+    &amp;&amp;
+    
+    ##set up input files
+
+    #set $reference_fasta_filename = "localref.fa"
+    
+    #if str( $reference_source.reference_source_selector ) == "history":
+        ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
+    #else:
+        #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+    #end if
+    
+    @java_options@
+    
+    java -jar \$JAVA_JAR_PATH/picard.jar
+    ValidateSamFile
+    
+    INPUT="${inputFile}"
+    OUTPUT="${outFile}"
+    MODE="${mode}"
+    
+    #if str( $ignore ) != "None":
+      #for $element in str( $ignore ).split(','):   ## See trello card https://trello.com/c/9nW02Zhd
+        IGNORE="${element}"
+      #end for
+    #end if
+    
+    MAX_OUTPUT="${max_output}"
+    REFERENCE_SEQUENCE="${reference_fasta_filename}"
+    IGNORE_WARNINGS="${ignore_warnings}"
+    IS_BISULFITE_SEQUENCED="${is_bisulfite_sequenced}"
+    MAX_OPEN_TEMP_FILES=`ulimit -Sn`
+     
+    VERBOSITY=ERROR
+    QUIET=true
+   
+  </command>
+  <inputs>
+    <param format="sam,bam" name="inputFile" type="data" label="SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset." />
+     <conditional name="reference_source">
+      <param name="reference_source_selector" type="select" label="Load reference genome from">
+        <option value="cached">Local cache</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
+          <options from_data_table="picard_indexes">
+            <filter type="sort_by" column="2" />
+            <validator type="no_options" message="No indexes are available" />
+          </options>
+          <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history"> 
+        <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" />
+      </when>
+    </conditional>
+     
+    <param name="mode" type="select" label="Select output mode" help="MODE">
+      <option value="VERBOSE">Verbose</option>
+      <option value="SUMMARY">Summary</option>
+    </param>
+    <param name="ignore" type="select" multiple="true" display="checkboxes" label="Select validation error types to ignore" help="IGNORE">
+      <option value="INVALID_QUALITY_FORMAT">INVALID_QUALITY_FORMAT</option>
+      <option value="INVALID_FLAG_PROPER_PAIR">INVALID_FLAG_PROPER_PAIR</option>
+      <option value="INVALID_FLAG_MATE_UNMAPPED"/>
+      <option value="MISMATCH_FLAG_MATE_UNMAPPED"/>
+      <option value="INVALID_FLAG_MATE_NEG_STRAND"/>
+      <option value="MISMATCH_FLAG_MATE_NEG_STRAND"/>
+      <option value="INVALID_FLAG_FIRST_OF_PAIR"/>
+      <option value="INVALID_FLAG_SECOND_OF_PAIR"/>
+      <option value="PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND"/>
+      <option value="INVALID_FLAG_NOT_PRIM_ALIGNMENT"/>
+      <option value="INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT"/>
+      <option value="INVALID_FLAG_READ_UNMAPPED"/>
+      <option value="INVALID_INSERT_SIZE"/>
+      <option value="INVALID_MAPPING_QUALITY"/>
+      <option value="INVALID_CIGAR"/>
+      <option value="ADJACENT_INDEL_IN_CIGAR"/>
+      <option value="INVALID_MATE_REF_INDEX"/>
+      <option value="MISMATCH_MATE_REF_INDEX"/>
+      <option value="INVALID_REFERENCE_INDEX"/>
+      <option value="INVALID_ALIGNMENT_START"/>
+      <option value="MISMATCH_MATE_ALIGNMENT_START"/>
+      <option value="MATE_FIELD_MISMATCH"/>
+      <option value="INVALID_TAG_NM"/>
+      <option value="MISSING_TAG_NM"/>
+      <option value="MISSING_HEADER"/>
+      <option value="MISSING_SEQUENCE_DICTIONARY"/>
+      <option value="MISSING_READ_GROUP"/>
+      <option value="RECORD_OUT_OF_ORDER"/>
+      <option value="READ_GROUP_NOT_FOUND"/>
+      <option value="RECORD_MISSING_READ_GROUP"/>
+      <option value="INVALID_INDEXING_BIN"/>
+      <option value="MISSING_VERSION_NUMBER"/>
+      <option value="INVALID_VERSION_NUMBER"/>
+      <option value="TRUNCATED_FILE"/>
+      <option value="MISMATCH_READ_LENGTH_AND_QUALS_LENGTH"/>
+      <option value="EMPTY_READ"/>
+      <option value="CIGAR_MAPS_OFF_REFERENCE"/>
+      <option value="MISMATCH_READ_LENGTH_AND_E2_LENGTH"/>
+      <option value="MISMATCH_READ_LENGTH_AND_U2_LENGTH"/>
+      <option value="E2_BASE_EQUALS_PRIMARY_BASE"/>
+      <option value="BAM_FILE_MISSING_TERMINATOR_BLOCK"/>
+      <option value="UNRECOGNIZED_HEADER_TYPE"/>
+      <option value="POORLY_FORMATTED_HEADER_TAG"/>
+      <option value="HEADER_TAG_MULTIPLY_DEFINED"/>
+      <option value="HEADER_RECORD_MISSING_REQUIRED_TAG"/>
+      <option value="INVALID_DATE_STRING"/>
+      <option value="TAG_VALUE_TOO_LARGE"/>
+      <option value="INVALID_INDEX_FILE_POINTER"/>
+      <option value="INVALID_PREDICTED_MEDIAN_INSERT_SIZE"/>
+      <option value="DUPLICATE_READ_GROUP_ID"/>
+      <option value="MISSING_PLATFORM_VALUE"/>
+      <option value="INVALID_PLATFORM_VALUE"/>
+      <option value="DUPLICATE_PROGRAM_GROUP_ID"/>
+      <option value="MATE_NOT_FOUND"/>
+      <option value="MATES_ARE_SAME_END"/>
+      <option value="MISMATCH_MATE_CIGAR_STRING"/>
+      <option value="MATE_CIGAR_STRING_INVALID_PRESENCE"/>
+    </param>
+    <param name="max_output" type="integer" value="100" label="The maximum number of lines output in verbose mode" help="MAX_OUTPUT; default=100"/>
+    <param name="ignore_warnings" type="boolean" label="If true, only report errors and ignore warnings" help="IGNORE_WARNINGS; default=False"/>
+    <param name="validate_index" type="boolean" checked="True" label="If true and input is a BAM file with an index file, also validates the index" help="VALIDATE_INDEX; default=True"/>
+    <param name="is_bisulfite_sequenced" type="boolean" label="Whether the SAM or BAM file consists of bisulfite sequenced reads" help="IS_BISULFITE_SEQUENCED; If so, C->T is not counted as an error in computing the value of the NM tag; default=False"/>
+    
+  </inputs> 
+  
+  <outputs>
+    <data format="txt" name="outFile" label="${tool.name} on ${on_string}: BAM validation summary"/>
+  </outputs>
+  
+  <tests>
+    <test>
+      <param name="inputFile" value="picard_ValidateSamFile.bam" ftype="bam"/>
+      <param name="reference_source_selector" value="history"/>
+      <param name="ref_file" value="picard_ValidateSamFile_ref.fa"/>
+      <param name="mode" value="VERBOSE"/>
+      <param name="ignore" value="INVALID_QUALITY_FORMAT,INVALID_FLAG_PROPER_PAIR"/>
+      <param name="max_output" value="100"/>
+      <param name="ignore_warnings" value="Fasle"/>
+      <param name="validate_index" value="True"/>
+      <param name="is_bisulfite_sequenced" value="False"/>
+      <output name="outFile" file="picard_ValidateSamFile_test1.txt" ftype="txt"/>
+    </test>
+  </tests>
+  
+  <stdio>
+    <exit_code range="1:"  level="warning"/>
+  </stdio>
+  
+  <help>
+
+**Purpose**
+
+Reads a SAM/BAM dataset and report on its validity.
+
+@dataset_collections@
+
+@description@
+
+  MODE=Mode
+  M=Mode                        Mode of output  Default value: VERBOSE. This option can be set to 'null' to clear the 
+                                default value. Possible values: {VERBOSE, SUMMARY} 
+  
+  IGNORE=Type                   List of validation error types to ignore.  Possible values: {INVALID_QUALITY_FORMAT, 
+                                INVALID_FLAG_PROPER_PAIR, INVALID_FLAG_MATE_UNMAPPED, MISMATCH_FLAG_MATE_UNMAPPED, 
+                                INVALID_FLAG_MATE_NEG_STRAND, MISMATCH_FLAG_MATE_NEG_STRAND, INVALID_FLAG_FIRST_OF_PAIR, 
+                                INVALID_FLAG_SECOND_OF_PAIR, PAIRED_READ_NOT_MARKED_AS_FIRST_OR_SECOND, 
+                                INVALID_FLAG_NOT_PRIM_ALIGNMENT, INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT, 
+                                INVALID_FLAG_READ_UNMAPPED, INVALID_INSERT_SIZE, INVALID_MAPPING_QUALITY, INVALID_CIGAR, 
+                                ADJACENT_INDEL_IN_CIGAR, INVALID_MATE_REF_INDEX, MISMATCH_MATE_REF_INDEX, 
+                                INVALID_REFERENCE_INDEX, INVALID_ALIGNMENT_START, MISMATCH_MATE_ALIGNMENT_START, 
+                                MATE_FIELD_MISMATCH, INVALID_TAG_NM, MISSING_TAG_NM, MISSING_HEADER, 
+                                MISSING_SEQUENCE_DICTIONARY, MISSING_READ_GROUP, RECORD_OUT_OF_ORDER, 
+                                READ_GROUP_NOT_FOUND, RECORD_MISSING_READ_GROUP, INVALID_INDEXING_BIN, 
+                                MISSING_VERSION_NUMBER, INVALID_VERSION_NUMBER, TRUNCATED_FILE, 
+                                MISMATCH_READ_LENGTH_AND_QUALS_LENGTH, EMPTY_READ, CIGAR_MAPS_OFF_REFERENCE, 
+                                MISMATCH_READ_LENGTH_AND_E2_LENGTH, MISMATCH_READ_LENGTH_AND_U2_LENGTH, 
+                                E2_BASE_EQUALS_PRIMARY_BASE, BAM_FILE_MISSING_TERMINATOR_BLOCK, UNRECOGNIZED_HEADER_TYPE, 
+                                POORLY_FORMATTED_HEADER_TAG, HEADER_TAG_MULTIPLY_DEFINED, 
+                                HEADER_RECORD_MISSING_REQUIRED_TAG, INVALID_DATE_STRING, TAG_VALUE_TOO_LARGE, 
+                                INVALID_INDEX_FILE_POINTER, INVALID_PREDICTED_MEDIAN_INSERT_SIZE, 
+                                DUPLICATE_READ_GROUP_ID, MISSING_PLATFORM_VALUE, INVALID_PLATFORM_VALUE, 
+                                DUPLICATE_PROGRAM_GROUP_ID, MATE_NOT_FOUND, MATES_ARE_SAME_END, 
+                                MISMATCH_MATE_CIGAR_STRING, MATE_CIGAR_STRING_INVALID_PRESENCE} This option may be 
+                                specified 0 or more times. 
+  
+  MAX_OUTPUT=Integer
+  MO=Integer                    The maximum number of lines output in verbose mode  Default value: 100. This option can 
+                                be set to 'null' to clear the default value. 
+  
+  REFERENCE_SEQUENCE=File
+  R=File                        Reference sequence file, the NM tag check will be skipped if this is missing  Default 
+                                value: null. 
+  
+  IGNORE_WARNINGS=Boolean       If true, only report errors and ignore warnings.  Default value: false. This option can 
+                                be set to 'null' to clear the default value. Possible values: {true, false} 
+  
+  VALIDATE_INDEX=Boolean        If true and input is a BAM file with an index file, also validates the index.  Default 
+                                value: true. This option can be set to 'null' to clear the default value. Possible 
+                                values: {true, false} 
+  
+  IS_BISULFITE_SEQUENCED=Boolean
+  BISULFITE=Boolean             Whether the SAM or BAM file consists of bisulfite sequenced reads. If so, C->T is not 
+                                counted as an error in computing the value of the NM tag.  Default value: false. This 
+                                option can be set to 'null' to clear the default value. Possible values: {true, false} 
+  
+@more_info@
+
+  </help>
+</tool>
+
+