annotate picard_CollectInsertSizeMetrics.xml @ 0:4419e9980172 draft

Uploaded
author devteam
date Thu, 23 Oct 2014 12:03:34 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4419e9980172 Uploaded
devteam
parents:
diff changeset
1 <tool name="CollectInsertSizeMetrics" id="picard_CollectInsertSizeMetrics" version="1.122.0">
4419e9980172 Uploaded
devteam
parents:
diff changeset
2 <description>plots distribution of insert sizes</description>
4419e9980172 Uploaded
devteam
parents:
diff changeset
3 <requirements>
4419e9980172 Uploaded
devteam
parents:
diff changeset
4 <requirement type="package" version="1.122.0">picard</requirement>
4419e9980172 Uploaded
devteam
parents:
diff changeset
5 </requirements>
4419e9980172 Uploaded
devteam
parents:
diff changeset
6
4419e9980172 Uploaded
devteam
parents:
diff changeset
7 <macros>
4419e9980172 Uploaded
devteam
parents:
diff changeset
8 <import>picard_macros.xml</import>
4419e9980172 Uploaded
devteam
parents:
diff changeset
9 </macros>
4419e9980172 Uploaded
devteam
parents:
diff changeset
10
4419e9980172 Uploaded
devteam
parents:
diff changeset
11 <command>
4419e9980172 Uploaded
devteam
parents:
diff changeset
12 @java_options@
4419e9980172 Uploaded
devteam
parents:
diff changeset
13 ##set up input files
4419e9980172 Uploaded
devteam
parents:
diff changeset
14
4419e9980172 Uploaded
devteam
parents:
diff changeset
15 #set $reference_fasta_filename = "localref.fa"
4419e9980172 Uploaded
devteam
parents:
diff changeset
16
4419e9980172 Uploaded
devteam
parents:
diff changeset
17 #if str( $reference_source.reference_source_selector ) == "history":
4419e9980172 Uploaded
devteam
parents:
diff changeset
18 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
4419e9980172 Uploaded
devteam
parents:
diff changeset
19 #else:
4419e9980172 Uploaded
devteam
parents:
diff changeset
20 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
4419e9980172 Uploaded
devteam
parents:
diff changeset
21 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
22
4419e9980172 Uploaded
devteam
parents:
diff changeset
23 java -jar \$JAVA_JAR_PATH/CollectInsertSizeMetrics.jar
4419e9980172 Uploaded
devteam
parents:
diff changeset
24 INPUT="${inputFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
25 OUTPUT="${outFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
26 HISTOGRAM_FILE="${histFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
27 DEVIATIONS="${deviations}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
28
4419e9980172 Uploaded
devteam
parents:
diff changeset
29 #if str( $hist_width ):
4419e9980172 Uploaded
devteam
parents:
diff changeset
30 HISTOGRAM_WIDTH="${hist_width}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
31 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
32
4419e9980172 Uploaded
devteam
parents:
diff changeset
33 MINIMUM_PCT="${min_pct}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
34 REFERENCE_SEQUENCE="${reference_fasta_filename}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
35 ASSUME_SORTED="${assume_sorted}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
36 METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
37
4419e9980172 Uploaded
devteam
parents:
diff changeset
38 VALIDATION_STRINGENCY="${validation_stringency}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
39 QUIET=true
4419e9980172 Uploaded
devteam
parents:
diff changeset
40 VERBOSITY=ERROR
4419e9980172 Uploaded
devteam
parents:
diff changeset
41
4419e9980172 Uploaded
devteam
parents:
diff changeset
42 </command>
4419e9980172 Uploaded
devteam
parents:
diff changeset
43 <inputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
44 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset."/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
45 <conditional name="reference_source">
4419e9980172 Uploaded
devteam
parents:
diff changeset
46 <param name="reference_source_selector" type="select" label="Load reference genome from">
4419e9980172 Uploaded
devteam
parents:
diff changeset
47 <option value="cached">Local cache</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
48 <option value="history">History</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
49 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
50 <when value="cached">
4419e9980172 Uploaded
devteam
parents:
diff changeset
51 <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE">
4419e9980172 Uploaded
devteam
parents:
diff changeset
52 <options from_data_table="all_fasta">
4419e9980172 Uploaded
devteam
parents:
diff changeset
53 </options>
4419e9980172 Uploaded
devteam
parents:
diff changeset
54 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
55 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
56 </when>
4419e9980172 Uploaded
devteam
parents:
diff changeset
57 <when value="history">
4419e9980172 Uploaded
devteam
parents:
diff changeset
58 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
59 </when>
4419e9980172 Uploaded
devteam
parents:
diff changeset
60 </conditional>
4419e9980172 Uploaded
devteam
parents:
diff changeset
61 <param name="deviations" type="float" value="10.0" label="Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION" help="DEVIATIONS; This option is offered because insert size data typically includes enough anomalous values from chimeras and other artifacts to make the mean and SD grossly misleading regarding the real distribution. default=10.0"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
62 <param name="hist_width" type="integer" optional="True" label="Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail" help="HISTOGRAM_WIDTH; optional"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
63 <param name="min_pct" type="float" value="0.05" label="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this percentage of overall reads" help="MINIMUM_PCT; (Range: 0 to 1). default=0.05. "/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
64 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
65 <param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL">
4419e9980172 Uploaded
devteam
parents:
diff changeset
66 <option value="ALL_READS" selected="True">All reads</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
67 <option value="SAMPLE">Sample</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
68 <option value="LIBRARY">Library</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
69 <option value="READ_GROUP">Read group</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
70 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
71
4419e9980172 Uploaded
devteam
parents:
diff changeset
72 <expand macro="VS" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
73
4419e9980172 Uploaded
devteam
parents:
diff changeset
74 </inputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
75
4419e9980172 Uploaded
devteam
parents:
diff changeset
76 <outputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
77 <data format="tabular" name="outFile"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
78 <data format="pdf" name="histFile"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
79 </outputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
80
4419e9980172 Uploaded
devteam
parents:
diff changeset
81 <tests>
4419e9980172 Uploaded
devteam
parents:
diff changeset
82 <test>
4419e9980172 Uploaded
devteam
parents:
diff changeset
83 <param name="metric_accumulation_level" value="ALL_READS"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
84 <param name="deviations" value="10.0" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
85 <param name="hist_width" value="500" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
86 <param name="min_pct" value="0.05" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
87 <param name="assume_sorted" value="true" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
88 <param name="reference_source_selector" value="history" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
89 <param name="ref_file" value="picard_CollectInsertSizeMetrics_ref.fa" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
90 <param name="inputFile" value="picard_CollectInsertSizeMetrics.bam" ftype="bam" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
91 <output name="outFile" file="picard_CollectInsertSizeMetrics_test1.tab" ftype="tabular" lines_diff="4"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
92 </test>
4419e9980172 Uploaded
devteam
parents:
diff changeset
93 </tests>
4419e9980172 Uploaded
devteam
parents:
diff changeset
94
4419e9980172 Uploaded
devteam
parents:
diff changeset
95 <stdio>
4419e9980172 Uploaded
devteam
parents:
diff changeset
96 <exit_code range="1:" level="fatal"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
97 </stdio>
4419e9980172 Uploaded
devteam
parents:
diff changeset
98
4419e9980172 Uploaded
devteam
parents:
diff changeset
99 <help>
4419e9980172 Uploaded
devteam
parents:
diff changeset
100
4419e9980172 Uploaded
devteam
parents:
diff changeset
101 .. class:: infomark
4419e9980172 Uploaded
devteam
parents:
diff changeset
102
4419e9980172 Uploaded
devteam
parents:
diff changeset
103 **Purpose**
4419e9980172 Uploaded
devteam
parents:
diff changeset
104
4419e9980172 Uploaded
devteam
parents:
diff changeset
105 Reads a SAM or BAM dataset and writes a file containing metrics about the statistical distribution of insert size (excluding duplicates) and generates a Histogram plot.
4419e9980172 Uploaded
devteam
parents:
diff changeset
106
4419e9980172 Uploaded
devteam
parents:
diff changeset
107 @dataset_collections@
4419e9980172 Uploaded
devteam
parents:
diff changeset
108
4419e9980172 Uploaded
devteam
parents:
diff changeset
109 @description@
4419e9980172 Uploaded
devteam
parents:
diff changeset
110
4419e9980172 Uploaded
devteam
parents:
diff changeset
111
4419e9980172 Uploaded
devteam
parents:
diff changeset
112 DEVIATIONS=Double Generate mean, sd and plots by trimming the data down to MEDIAN +
4419e9980172 Uploaded
devteam
parents:
diff changeset
113 DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. This is done because insert size data typically
4419e9980172 Uploaded
devteam
parents:
diff changeset
114 includes enough anomalous values from chimeras and other artifacts to make the mean and
4419e9980172 Uploaded
devteam
parents:
diff changeset
115 sd grossly misleading regarding the real distribution. Default value: 10.0.
4419e9980172 Uploaded
devteam
parents:
diff changeset
116
4419e9980172 Uploaded
devteam
parents:
diff changeset
117 HISTOGRAM_WIDTH=Integer
4419e9980172 Uploaded
devteam
parents:
diff changeset
118 W=Integer Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail.
4419e9980172 Uploaded
devteam
parents:
diff changeset
119 Also, when calculating mean and standard deviation, only bins &lt;= Histogram_WIDTH will be
4419e9980172 Uploaded
devteam
parents:
diff changeset
120 included. Default value: not set.
4419e9980172 Uploaded
devteam
parents:
diff changeset
121
4419e9980172 Uploaded
devteam
parents:
diff changeset
122 MINIMUM_PCT=Float
4419e9980172 Uploaded
devteam
parents:
diff changeset
123 M=Float When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that
4419e9980172 Uploaded
devteam
parents:
diff changeset
124 have fewer than this percentage of overall reads. (Range: 0 to 1). Default value: 0.05.
4419e9980172 Uploaded
devteam
parents:
diff changeset
125
4419e9980172 Uploaded
devteam
parents:
diff changeset
126 METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
4419e9980172 Uploaded
devteam
parents:
diff changeset
127 LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
4419e9980172 Uploaded
devteam
parents:
diff changeset
128 LIBRARY, READ_GROUP} This option may be specified 0 or more times.
4419e9980172 Uploaded
devteam
parents:
diff changeset
129
4419e9980172 Uploaded
devteam
parents:
diff changeset
130 ASSUME_SORTED=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
131 AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
4419e9980172 Uploaded
devteam
parents:
diff changeset
132 value: true. This option can be set to 'null' to clear the default value. Possible
4419e9980172 Uploaded
devteam
parents:
diff changeset
133 values: {true, false}
4419e9980172 Uploaded
devteam
parents:
diff changeset
134
4419e9980172 Uploaded
devteam
parents:
diff changeset
135 @more_info@
4419e9980172 Uploaded
devteam
parents:
diff changeset
136
4419e9980172 Uploaded
devteam
parents:
diff changeset
137 </help>
4419e9980172 Uploaded
devteam
parents:
diff changeset
138 </tool>
4419e9980172 Uploaded
devteam
parents:
diff changeset
139
4419e9980172 Uploaded
devteam
parents:
diff changeset
140