Mercurial > repos > devteam > picard_122_up

diff picard_CollectInsertSizeMetrics.xml @ 0:b76a4f17bbbb draft
Uploaded
author: devteam
date: Thu, 23 Oct 2014 11:31:30 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/picard_CollectInsertSizeMetrics.xml	Thu Oct 23 11:31:30 2014 -0400
@@ -0,0 +1,140 @@
+<tool name="CollectInsertSizeMetrics" id="picard_CollectInsertSizeMetrics" version="1.122.0">
+  <description>plots distribution of insert sizes</description>
+  <requirements>
+    <requirement type="package" version="1.122.0">picard</requirement>
+  </requirements>
+  
+  <macros>
+    <import>picard_macros.xml</import>
+  </macros>
+  
+  <command>
+    @java_options@
+    ##set up input files
+
+    #set $reference_fasta_filename = "localref.fa"
+    
+    #if str( $reference_source.reference_source_selector ) == "history":
+        ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
+    #else:
+        #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
+    #end if
+    
+    java -jar \$JAVA_JAR_PATH/CollectInsertSizeMetrics.jar
+    INPUT="${inputFile}"
+    OUTPUT="${outFile}"
+    HISTOGRAM_FILE="${histFile}"
+    DEVIATIONS="${deviations}"
+    
+    #if str( $hist_width ):
+      HISTOGRAM_WIDTH="${hist_width}"
+    #end if
+    
+    MINIMUM_PCT="${min_pct}"
+    REFERENCE_SEQUENCE="${reference_fasta_filename}"
+    ASSUME_SORTED="${assume_sorted}"
+    METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
+
+    VALIDATION_STRINGENCY="${validation_stringency}"
+    QUIET=true
+    VERBOSITY=ERROR
+  
+  </command>
+  <inputs>
+    <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset."/>
+    <conditional name="reference_source">
+      <param name="reference_source_selector" type="select" label="Load reference genome from">
+        <option value="cached">Local cache</option>
+        <option value="history">History</option>
+      </param>
+      <when value="cached">
+        <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE">
+          <options from_data_table="all_fasta">
+          </options>
+          <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
+      </when>
+    </conditional>
+    <param name="deviations" type="float" value="10.0" label="Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION" help="DEVIATIONS; This option is offered because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and SD grossly misleading regarding the real distribution. default=10.0"/>
+    <param name="hist_width" type="integer" optional="True" label="Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail" help="HISTOGRAM_WIDTH; optional"/>
+    <param name="min_pct" type="float" value="0.05" label="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads" help="MINIMUM_PCT; (Range: 0 to 1). default=0.05. "/>
+    <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/>
+    <param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL">
+      <option value="ALL_READS" selected="True">All reads</option>
+      <option value="SAMPLE">Sample</option>
+      <option value="LIBRARY">Library</option>
+      <option value="READ_GROUP">Read group</option>
+    </param>
+    
+    <expand macro="VS" />
+    
+  </inputs>
+  
+  <outputs>
+    <data format="tabular" name="outFile"/>
+    <data format="pdf" name="histFile"/>
+  </outputs>
+  
+  <tests>
+    <test>
+      <param name="metric_accumulation_level" value="ALL_READS"/>
+      <param name="deviations" value="10.0" />
+      <param name="hist_width" value="500" />
+      <param name="min_pct" value="0.05" />
+      <param name="assume_sorted" value="true" />
+      <param name="reference_source_selector" value="history" />
+      <param name="ref_file" value="picard_CollectInsertSizeMetrics_ref.fa" />
+      <param name="inputFile" value="picard_CollectInsertSizeMetrics.bam" ftype="bam" />
+      <output name="outFile" file="picard_CollectInsertSizeMetrics_test1.tab" ftype="tabular" lines_diff="4"/>
+    </test>
+  </tests>
+  
+  <stdio>
+    <exit_code range="1:"  level="fatal"/>
+  </stdio>
+  
+  <help>
+
+.. class:: infomark
+
+**Purpose**
+
+Reads a SAM or BAM dataset and writes a file containing metrics about the statistical distribution of insert size (excluding duplicates) and generates a Histogram plot.
+
+@dataset_collections@
+
+@description@
+
+ 
+  DEVIATIONS=Double             Generate mean, sd and plots by trimming the data down to MEDIAN + 
+                                DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically 
+                                includes enough anomalous values from chimeras and other artifacts to make the mean and 
+                                sd grossly misleading regarding the real distribution.  Default value: 10.0. 
+
+  HISTOGRAM_WIDTH=Integer
+  W=Integer                     Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. 
+                                Also, when calculating mean and standard deviation, only bins &lt;= Histogram_WIDTH will be 
+                                included.  Default value: not set. 
+
+  MINIMUM_PCT=Float
+  M=Float                       When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that 
+                                have fewer than this percentage of overall reads. (Range: 0 to 1).  Default value: 0.05. 
+
+  METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
+  LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics.    Possible values: {ALL_READS, SAMPLE, 
+                                LIBRARY, READ_GROUP} This option may be specified 0 or more times. 
+ 
+  ASSUME_SORTED=Boolean
+  AS=Boolean                    If true (default), then the sort order in the header file will be ignored.  Default 
+                                value: true. This option can be set to 'null' to clear the default value. Possible 
+                                values: {true, false}
+                                
+@more_info@
+
+  </help>
+</tool>
+
+
author	devteam
date	Thu, 23 Oct 2014 11:31:30 -0400
parents
children