annotate picard_CollectRnaSeqMetrics.xml @ 2:4aa5b444fd42 draft default tip

Deleted selected files
author devteam
date Tue, 16 Dec 2014 16:50:01 -0500
parents 4419e9980172
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4419e9980172 Uploaded
devteam
parents:
diff changeset
1 <tool name="CollectRnaSeqMetrics" id="picard_CollectRnaSeqMetrics" version="1.122.0">
4419e9980172 Uploaded
devteam
parents:
diff changeset
2 <description> collect metrics about the alignment of RNA to various functional classes of loci in the genome</description>
4419e9980172 Uploaded
devteam
parents:
diff changeset
3 <requirements>
4419e9980172 Uploaded
devteam
parents:
diff changeset
4 <requirement type="package" version="1.122.0">picard</requirement>
4419e9980172 Uploaded
devteam
parents:
diff changeset
5 </requirements>
4419e9980172 Uploaded
devteam
parents:
diff changeset
6
4419e9980172 Uploaded
devteam
parents:
diff changeset
7 <macros>
4419e9980172 Uploaded
devteam
parents:
diff changeset
8 <import>picard_macros.xml</import>
4419e9980172 Uploaded
devteam
parents:
diff changeset
9 </macros>
4419e9980172 Uploaded
devteam
parents:
diff changeset
10
4419e9980172 Uploaded
devteam
parents:
diff changeset
11
4419e9980172 Uploaded
devteam
parents:
diff changeset
12 <command>
4419e9980172 Uploaded
devteam
parents:
diff changeset
13
4419e9980172 Uploaded
devteam
parents:
diff changeset
14 ## Set up input files
4419e9980172 Uploaded
devteam
parents:
diff changeset
15
4419e9980172 Uploaded
devteam
parents:
diff changeset
16 ## Reference sequences
4419e9980172 Uploaded
devteam
parents:
diff changeset
17
4419e9980172 Uploaded
devteam
parents:
diff changeset
18 #set $reference_fasta_filename = "localref.fa"
4419e9980172 Uploaded
devteam
parents:
diff changeset
19
4419e9980172 Uploaded
devteam
parents:
diff changeset
20 #if str( $reference_source.reference_source_selector ) == "history":
4419e9980172 Uploaded
devteam
parents:
diff changeset
21 ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" &amp;&amp;
4419e9980172 Uploaded
devteam
parents:
diff changeset
22 #else:
4419e9980172 Uploaded
devteam
parents:
diff changeset
23 #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
4419e9980172 Uploaded
devteam
parents:
diff changeset
24 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
25
4419e9980172 Uploaded
devteam
parents:
diff changeset
26 ## refFlat data
4419e9980172 Uploaded
devteam
parents:
diff changeset
27 ## The awk line below converts a file obtained from UCSC as specified in the tool help to refFlat format
4419e9980172 Uploaded
devteam
parents:
diff changeset
28
4419e9980172 Uploaded
devteam
parents:
diff changeset
29 grep -v '^#' ${refFlat} | awk '{print $11"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$9"\t"$10}' > refFlat.tab &amp;&amp;
4419e9980172 Uploaded
devteam
parents:
diff changeset
30
4419e9980172 Uploaded
devteam
parents:
diff changeset
31 ## Start picard command
4419e9980172 Uploaded
devteam
parents:
diff changeset
32
4419e9980172 Uploaded
devteam
parents:
diff changeset
33 @java_options@
4419e9980172 Uploaded
devteam
parents:
diff changeset
34 java -jar \$JAVA_JAR_PATH/CollectRnaSeqMetrics.jar
4419e9980172 Uploaded
devteam
parents:
diff changeset
35 REF_FLAT=refFlat.tab
4419e9980172 Uploaded
devteam
parents:
diff changeset
36
4419e9980172 Uploaded
devteam
parents:
diff changeset
37 #if str( $ribosomal_intervals ) != "None":
4419e9980172 Uploaded
devteam
parents:
diff changeset
38 RIBOSOMAL_INTERVALS="${ribosomal_intervals}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
39 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
40
4419e9980172 Uploaded
devteam
parents:
diff changeset
41 STRAND_SPECIFICITY="${strand_specificity}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
42 MINIMUM_LENGTH="${minimum_length}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
43 CHART_OUTPUT="${pdfFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
44
4419e9980172 Uploaded
devteam
parents:
diff changeset
45 #for $sequence_to_ignore in $ignore_list:
4419e9980172 Uploaded
devteam
parents:
diff changeset
46 IGNORE_SEQUENCE="${sequence_to_ignore.sequence}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
47 #end for
4419e9980172 Uploaded
devteam
parents:
diff changeset
48
4419e9980172 Uploaded
devteam
parents:
diff changeset
49 RRNA_FRAGMENT_PERCENTAGE="${rrna_fragment_percentage}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
50 METRIC_ACCUMULATION_LEVEL="${metric_accumulation_level}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
51 INPUT="${inputFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
52 OUTPUT="${outFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
53 REFERENCE_SEQUENCE="${reference_fasta_filename}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
54 ASSUME_SORTED="${assume_sorted}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
55
4419e9980172 Uploaded
devteam
parents:
diff changeset
56 QUIET=true
4419e9980172 Uploaded
devteam
parents:
diff changeset
57 VERBOSITY=ERROR
4419e9980172 Uploaded
devteam
parents:
diff changeset
58 VALIDATION_STRINGENCY=${validation_stringency}
4419e9980172 Uploaded
devteam
parents:
diff changeset
59
4419e9980172 Uploaded
devteam
parents:
diff changeset
60 </command>
4419e9980172 Uploaded
devteam
parents:
diff changeset
61
4419e9980172 Uploaded
devteam
parents:
diff changeset
62 <inputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
63 <param format="sam,bam" type="data" name="inputFile" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
64 <conditional name="reference_source">
4419e9980172 Uploaded
devteam
parents:
diff changeset
65 <param name="reference_source_selector" type="select" label="Load reference genome from">
4419e9980172 Uploaded
devteam
parents:
diff changeset
66 <option value="cached">Local cache</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
67 <option value="history">History</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
68 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
69 <when value="cached">
4419e9980172 Uploaded
devteam
parents:
diff changeset
70 <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE">
4419e9980172 Uploaded
devteam
parents:
diff changeset
71 <options from_data_table="all_fasta"></options>
4419e9980172 Uploaded
devteam
parents:
diff changeset
72 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
73 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
74 </when>
4419e9980172 Uploaded
devteam
parents:
diff changeset
75 <when value="history">
4419e9980172 Uploaded
devteam
parents:
diff changeset
76 <param name="ref_file" type="data" format="fasta" label="Use the folloing dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
77 </when>
4419e9980172 Uploaded
devteam
parents:
diff changeset
78 </conditional>
4419e9980172 Uploaded
devteam
parents:
diff changeset
79 <param format="tabular" name="refFlat" type="data" label="Gene annotations in refFlat form" help="See &quot;Obtaining gene annotations in refFlat format&quot; below for help" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
80 <param name="ribosomal_intervals" format="picard_interval_list" type="data" optional="True" label="Location of rRNA sequences in genome, in interval_list format" help="RIBOSOMAL_INTERVALS; If not specified no bases will be identified as being ribosomal. The list of intervals can be geberated from BED or Interval datasets using Galaxy BedToIntervalList tool"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
81 <param name="strand_specificity" type="select" label="What is the RNA-seq library strand specificity" help="STRAND_SPECIFICITY; For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand.">
4419e9980172 Uploaded
devteam
parents:
diff changeset
82 <option value="NONE" select="True">None</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
83 <option value="FIRST_READ_TRANSCRIPTION_STRAND">First read transcription strand</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
84 <option value="SECOND_READ_TRANSCRIPTION_STRAND">Second read transcription strand</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
85 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
86 <param name="minimum_length" type="integer" value="500" label="When calculating coverage based values use only use transcripts of this length or greater" help="MINIMUM_LENGTH; default=500"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
87 <repeat name="ignore_list" title="Sequences to ignore" min="0" help="You can provide multiple sequences by clicking the button below">
4419e9980172 Uploaded
devteam
parents:
diff changeset
88 <param name="sequence" type="text" size="80" label="Ignore reads matching this sequence"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
89 </repeat>
4419e9980172 Uploaded
devteam
parents:
diff changeset
90 <param name="rrna_fragment_percentage" type="float" value="0.8" label="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair to be considered rRNA." help="RRNA_FRAGMENT_PERCENTAGE; default=0.8"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
91 <param name="metric_accumulation_level" type="select" label="The level(s) at which to accumulate metrics" multiple="true" help="METRIC_ACCUMULATION_LEVEL">
4419e9980172 Uploaded
devteam
parents:
diff changeset
92 <option value="ALL_READS" selected="True">All reads</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
93 <option value="SAMPLE">Sample</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
94 <option value="LIBRARY">Library</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
95 <option value="READ_GROUP">Read group</option>
4419e9980172 Uploaded
devteam
parents:
diff changeset
96 </param>
4419e9980172 Uploaded
devteam
parents:
diff changeset
97 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
98
4419e9980172 Uploaded
devteam
parents:
diff changeset
99 <expand macro="VS" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
100
4419e9980172 Uploaded
devteam
parents:
diff changeset
101 </inputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
102 <outputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
103 <data format="pdf" name="pdfFile" label="${tool.name} on ${on_string}: Chart PDF"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
104 <data format="tabular" name="outFile" label="${tool.name} on ${on_string}: Summary stats"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
105 </outputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
106
4419e9980172 Uploaded
devteam
parents:
diff changeset
107 <stdio>
4419e9980172 Uploaded
devteam
parents:
diff changeset
108 <exit_code range="1:" level="fatal"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
109 </stdio>
4419e9980172 Uploaded
devteam
parents:
diff changeset
110 <tests>
4419e9980172 Uploaded
devteam
parents:
diff changeset
111 <test>
4419e9980172 Uploaded
devteam
parents:
diff changeset
112 <param name="reference_source_selector" value="history"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
113 <param name="ref_file" value="picard_CollectRnaSeqMetrics_ref.fa" ftype="fasta"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
114 <param name="inputFile" value="picard_CollectRnaSeqMetrics.bam" ftype="bam"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
115 <param name="assume_sorted" value="true" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
116 <param name="refFlat" value="picard_CollectRnaSeqMetrics.refFlat" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
117 <param name="metric_accumulation_level" value="ALL_READS" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
118 <param name="minimum_length" value="500" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
119 <param name="strand_specificity" value="NONE" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
120 <param name="rrna_fragment_percentage" value="0.8" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
121 <output name="outFile" file="picard_CollectRnaSeqMetrics_test1.tab" ftype="tabular" lines_diff="4"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
122 </test>
4419e9980172 Uploaded
devteam
parents:
diff changeset
123
4419e9980172 Uploaded
devteam
parents:
diff changeset
124 </tests>
4419e9980172 Uploaded
devteam
parents:
diff changeset
125 <help>
4419e9980172 Uploaded
devteam
parents:
diff changeset
126
4419e9980172 Uploaded
devteam
parents:
diff changeset
127 .. class:: infomark
4419e9980172 Uploaded
devteam
parents:
diff changeset
128
4419e9980172 Uploaded
devteam
parents:
diff changeset
129 **Purpose**
4419e9980172 Uploaded
devteam
parents:
diff changeset
130
4419e9980172 Uploaded
devteam
parents:
diff changeset
131 Collects metrics about the alignment of RNA to various functional classes of loci in the genome: coding, intronic, UTR, intergenic, ribosomal.
4419e9980172 Uploaded
devteam
parents:
diff changeset
132
4419e9980172 Uploaded
devteam
parents:
diff changeset
133 @dataset_collections@
4419e9980172 Uploaded
devteam
parents:
diff changeset
134
4419e9980172 Uploaded
devteam
parents:
diff changeset
135 -----
4419e9980172 Uploaded
devteam
parents:
diff changeset
136
4419e9980172 Uploaded
devteam
parents:
diff changeset
137 .. class:: warningmark
4419e9980172 Uploaded
devteam
parents:
diff changeset
138
4419e9980172 Uploaded
devteam
parents:
diff changeset
139 **Obtaining gene annotations in refFlat format**
4419e9980172 Uploaded
devteam
parents:
diff changeset
140
4419e9980172 Uploaded
devteam
parents:
diff changeset
141 This tool requires gene annotations in refFlat_ format. These data can be obtained from UCSC table browser directly through Galaxy by following these steps:
4419e9980172 Uploaded
devteam
parents:
diff changeset
142
4419e9980172 Uploaded
devteam
parents:
diff changeset
143 1. Click on **Get Data** in the upper part of left pane of Galaxy interface
4419e9980172 Uploaded
devteam
parents:
diff changeset
144 2. Click on **UCSC Main** link
4419e9980172 Uploaded
devteam
parents:
diff changeset
145 3. Set your genome and dataset of interest. It **must** be the same genome build against which you have mapped the reads contained in the BAM file you are analyzing
4419e9980172 Uploaded
devteam
parents:
diff changeset
146 4. In the **output format** field choose **selected fields from primary and related tables**
4419e9980172 Uploaded
devteam
parents:
diff changeset
147 5. Click **get output** button
4419e9980172 Uploaded
devteam
parents:
diff changeset
148 6. In the first table presented at the top of the page select (using checkboxes) first 11 fields:
4419e9980172 Uploaded
devteam
parents:
diff changeset
149 name
4419e9980172 Uploaded
devteam
parents:
diff changeset
150 chrom
4419e9980172 Uploaded
devteam
parents:
diff changeset
151 strand
4419e9980172 Uploaded
devteam
parents:
diff changeset
152 txStart
4419e9980172 Uploaded
devteam
parents:
diff changeset
153 txEnd
4419e9980172 Uploaded
devteam
parents:
diff changeset
154 cdsStart
4419e9980172 Uploaded
devteam
parents:
diff changeset
155 cdsEnd
4419e9980172 Uploaded
devteam
parents:
diff changeset
156 exonCount
4419e9980172 Uploaded
devteam
parents:
diff changeset
157 exonStarts
4419e9980172 Uploaded
devteam
parents:
diff changeset
158 exonEnds
4419e9980172 Uploaded
devteam
parents:
diff changeset
159 proteinId
4419e9980172 Uploaded
devteam
parents:
diff changeset
160 7. Click **done with selection**
4419e9980172 Uploaded
devteam
parents:
diff changeset
161 8. Click **Send query to Galaxy**
4419e9980172 Uploaded
devteam
parents:
diff changeset
162 9. A new dataset will appear in the current Galaxy history
4419e9980172 Uploaded
devteam
parents:
diff changeset
163 10. Use this dataset as the input for **Gene annotations in refFlat form** dropdown of this tool
4419e9980172 Uploaded
devteam
parents:
diff changeset
164
4419e9980172 Uploaded
devteam
parents:
diff changeset
165 .. _refFlat: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat
4419e9980172 Uploaded
devteam
parents:
diff changeset
166
4419e9980172 Uploaded
devteam
parents:
diff changeset
167 @description@
4419e9980172 Uploaded
devteam
parents:
diff changeset
168
4419e9980172 Uploaded
devteam
parents:
diff changeset
169 REF_FLAT=File Gene annotations in refFlat form. Format described here:
4419e9980172 Uploaded
devteam
parents:
diff changeset
170 http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat Required.
4419e9980172 Uploaded
devteam
parents:
diff changeset
171
4419e9980172 Uploaded
devteam
parents:
diff changeset
172 RIBOSOMAL_INTERVALS=File Location of rRNA sequences in genome, in interval_list format. If not specified no bases
4419e9980172 Uploaded
devteam
parents:
diff changeset
173 will be identified as being ribosomal. Format described here:
4419e9980172 Uploaded
devteam
parents:
diff changeset
174 http://picard.sourceforge.net/javadoc/net/sf/picard/util/IntervalList.html and can be
4419e9980172 Uploaded
devteam
parents:
diff changeset
175 generated from BED datasetes using Galaxy's wrapper for picard_BedToIntervalList tool
4419e9980172 Uploaded
devteam
parents:
diff changeset
176
4419e9980172 Uploaded
devteam
parents:
diff changeset
177 STRAND_SPECIFICITY=StrandSpecificity
4419e9980172 Uploaded
devteam
parents:
diff changeset
178 STRAND=StrandSpecificity For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND
4419e9980172 Uploaded
devteam
parents:
diff changeset
179 if the reads are expected to be on the transcription strand. Required. Possible values:
4419e9980172 Uploaded
devteam
parents:
diff changeset
180 {NONE, FIRST_READ_TRANSCRIPTION_STRAND, SECOND_READ_TRANSCRIPTION_STRAND}
4419e9980172 Uploaded
devteam
parents:
diff changeset
181
4419e9980172 Uploaded
devteam
parents:
diff changeset
182 MINIMUM_LENGTH=Integer When calculating coverage based values (e.g. CV of coverage) only use transcripts of this
4419e9980172 Uploaded
devteam
parents:
diff changeset
183 length or greater. Default value: 500.
4419e9980172 Uploaded
devteam
parents:
diff changeset
184
4419e9980172 Uploaded
devteam
parents:
diff changeset
185 IGNORE_SEQUENCE=String If a read maps to a sequence specified with this option, all the bases in the read are
4419e9980172 Uploaded
devteam
parents:
diff changeset
186 counted as ignored bases.
4419e9980172 Uploaded
devteam
parents:
diff changeset
187
4419e9980172 Uploaded
devteam
parents:
diff changeset
188 RRNA_FRAGMENT_PERCENTAGE=Double
4419e9980172 Uploaded
devteam
parents:
diff changeset
189 This percentage of the length of a fragment must overlap one of the ribosomal intervals
4419e9980172 Uploaded
devteam
parents:
diff changeset
190 for a read or read pair by this must in order to be considered rRNA. Default value: 0.8.
4419e9980172 Uploaded
devteam
parents:
diff changeset
191
4419e9980172 Uploaded
devteam
parents:
diff changeset
192 METRIC_ACCUMULATION_LEVEL=MetricAccumulationLevel
4419e9980172 Uploaded
devteam
parents:
diff changeset
193 LEVEL=MetricAccumulationLevel The level(s) at which to accumulate metrics. Possible values: {ALL_READS, SAMPLE,
4419e9980172 Uploaded
devteam
parents:
diff changeset
194 LIBRARY, READ_GROUP} This option may be specified 0 or more times.
4419e9980172 Uploaded
devteam
parents:
diff changeset
195
4419e9980172 Uploaded
devteam
parents:
diff changeset
196 ASSUME_SORTED=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
197 AS=Boolean If true (default), then the sort order in the header file will be ignored. Default
4419e9980172 Uploaded
devteam
parents:
diff changeset
198 value: true. Possible values: {true, false}
4419e9980172 Uploaded
devteam
parents:
diff changeset
199
4419e9980172 Uploaded
devteam
parents:
diff changeset
200 @more_info@
4419e9980172 Uploaded
devteam
parents:
diff changeset
201
4419e9980172 Uploaded
devteam
parents:
diff changeset
202 </help>
4419e9980172 Uploaded
devteam
parents:
diff changeset
203 </tool>