annotate picard_SamToFastq.xml @ 0:4419e9980172 draft

Uploaded
author devteam
date Thu, 23 Oct 2014 12:03:34 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4419e9980172 Uploaded
devteam
parents:
diff changeset
1 <tool name="SamToFastq" id="picard_SamToFastq" version="1.122.0">
4419e9980172 Uploaded
devteam
parents:
diff changeset
2 <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description>
4419e9980172 Uploaded
devteam
parents:
diff changeset
3 <requirements>
4419e9980172 Uploaded
devteam
parents:
diff changeset
4 <requirement type="package" version="1.122.0">picard</requirement>
4419e9980172 Uploaded
devteam
parents:
diff changeset
5 </requirements>
4419e9980172 Uploaded
devteam
parents:
diff changeset
6
4419e9980172 Uploaded
devteam
parents:
diff changeset
7 <macros>
4419e9980172 Uploaded
devteam
parents:
diff changeset
8 <import>picard_macros.xml</import>
4419e9980172 Uploaded
devteam
parents:
diff changeset
9 </macros>
4419e9980172 Uploaded
devteam
parents:
diff changeset
10
4419e9980172 Uploaded
devteam
parents:
diff changeset
11 <command>
4419e9980172 Uploaded
devteam
parents:
diff changeset
12
4419e9980172 Uploaded
devteam
parents:
diff changeset
13 echo "BAM" > $report &amp;&amp; ## This is necessary for output dataset detection (see output tags below)
4419e9980172 Uploaded
devteam
parents:
diff changeset
14
4419e9980172 Uploaded
devteam
parents:
diff changeset
15 @java_options@
4419e9980172 Uploaded
devteam
parents:
diff changeset
16
4419e9980172 Uploaded
devteam
parents:
diff changeset
17 java -jar \$JAVA_JAR_PATH/SamToFastq.jar
4419e9980172 Uploaded
devteam
parents:
diff changeset
18
4419e9980172 Uploaded
devteam
parents:
diff changeset
19 INPUT="${inputFile}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
20
4419e9980172 Uploaded
devteam
parents:
diff changeset
21 #if str( $output_per_rg ) == "true":
4419e9980172 Uploaded
devteam
parents:
diff changeset
22 OUTPUT_PER_RG=true
4419e9980172 Uploaded
devteam
parents:
diff changeset
23 OUTPUT_DIR=.
4419e9980172 Uploaded
devteam
parents:
diff changeset
24 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false":
4419e9980172 Uploaded
devteam
parents:
diff changeset
25 FASTQ=READ1.fastq
4419e9980172 Uploaded
devteam
parents:
diff changeset
26 SECOND_END_FASTQ=READ2.fastq
4419e9980172 Uploaded
devteam
parents:
diff changeset
27 UNPAIRED_FASTQ=UNPAIRED_READS.fastq
4419e9980172 Uploaded
devteam
parents:
diff changeset
28 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true":
4419e9980172 Uploaded
devteam
parents:
diff changeset
29 FASTQ=INTERLEAVED.fastq
4419e9980172 Uploaded
devteam
parents:
diff changeset
30 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
31
4419e9980172 Uploaded
devteam
parents:
diff changeset
32 RE_REVERSE="${re_reverse}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
33 INTERLEAVE="${interleave}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
34 INCLUDE_NON_PF_READS="${include_non_pf_reads}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
35 CLIPPING_ATTRIBUTE="${clipping_attribute}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
36 CLIPPING_ACTION="${clipping_action}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
37 READ1_TRIM="${read1_trim}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
38
4419e9980172 Uploaded
devteam
parents:
diff changeset
39 #if int($read1_max_bases_to_write) > -1:
4419e9980172 Uploaded
devteam
parents:
diff changeset
40 READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
41 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
42
4419e9980172 Uploaded
devteam
parents:
diff changeset
43 READ2_TRIM="${read2_trim}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
44
4419e9980172 Uploaded
devteam
parents:
diff changeset
45 #if int($read2_max_bases_to_write) > -1:
4419e9980172 Uploaded
devteam
parents:
diff changeset
46 READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
47 #end if
4419e9980172 Uploaded
devteam
parents:
diff changeset
48
4419e9980172 Uploaded
devteam
parents:
diff changeset
49 INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
50
4419e9980172 Uploaded
devteam
parents:
diff changeset
51
4419e9980172 Uploaded
devteam
parents:
diff changeset
52 VALIDATION_STRINGENCY="${validation_stringency}"
4419e9980172 Uploaded
devteam
parents:
diff changeset
53 QUIET=true
4419e9980172 Uploaded
devteam
parents:
diff changeset
54 VERBOSITY=ERROR
4419e9980172 Uploaded
devteam
parents:
diff changeset
55
4419e9980172 Uploaded
devteam
parents:
diff changeset
56 </command>
4419e9980172 Uploaded
devteam
parents:
diff changeset
57 <inputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
58
4419e9980172 Uploaded
devteam
parents:
diff changeset
59 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
60 <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
61 <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
62 <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
63 <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
64 <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
65 <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
66 <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
67 <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
68 <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
69 <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
70 <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
71
4419e9980172 Uploaded
devteam
parents:
diff changeset
72 <expand macro="VS" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
73
4419e9980172 Uploaded
devteam
parents:
diff changeset
74 </inputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
75
4419e9980172 Uploaded
devteam
parents:
diff changeset
76 <outputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
77 <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files -->
4419e9980172 Uploaded
devteam
parents:
diff changeset
78 <data format="txt" name="report" label="SamToFastq run" hidden="true">
4419e9980172 Uploaded
devteam
parents:
diff changeset
79 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fastq" ext="fastqsanger" visible="true"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
80 </data>
4419e9980172 Uploaded
devteam
parents:
diff changeset
81 </outputs>
4419e9980172 Uploaded
devteam
parents:
diff changeset
82
4419e9980172 Uploaded
devteam
parents:
diff changeset
83 <tests>
4419e9980172 Uploaded
devteam
parents:
diff changeset
84 <test>
4419e9980172 Uploaded
devteam
parents:
diff changeset
85 <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
86 <param name="output_per_rg" value="false"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
87 <param name="re_reverse" value="true"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
88 <param name="interleave" value="true"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
89 <param name="include_non_pf_reads" value="false"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
90 <param name="clipping_attribute" value="null" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
91 <param name="clipping_action" value="null" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
92 <param name="read1_trim" value="0" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
93 <param name="read1_max_bases_to_write" value="-1"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
94 <param name="read2_trim" value="0" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
95 <param name="read2_max_bases_to_write" value="-1"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
96 <param name="include_non_primary_alignments" value="false"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
97 <output name="report">
4419e9980172 Uploaded
devteam
parents:
diff changeset
98 <assert_contents>
4419e9980172 Uploaded
devteam
parents:
diff changeset
99 <has_line line="BAM" />
4419e9980172 Uploaded
devteam
parents:
diff changeset
100 </assert_contents>
4419e9980172 Uploaded
devteam
parents:
diff changeset
101 <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
102 </output>
4419e9980172 Uploaded
devteam
parents:
diff changeset
103 </test>
4419e9980172 Uploaded
devteam
parents:
diff changeset
104 </tests>
4419e9980172 Uploaded
devteam
parents:
diff changeset
105
4419e9980172 Uploaded
devteam
parents:
diff changeset
106 <stdio>
4419e9980172 Uploaded
devteam
parents:
diff changeset
107 <exit_code range="1:" level="fatal"/>
4419e9980172 Uploaded
devteam
parents:
diff changeset
108 </stdio>
4419e9980172 Uploaded
devteam
parents:
diff changeset
109
4419e9980172 Uploaded
devteam
parents:
diff changeset
110 <help>
4419e9980172 Uploaded
devteam
parents:
diff changeset
111
4419e9980172 Uploaded
devteam
parents:
diff changeset
112 **Purpose**
4419e9980172 Uploaded
devteam
parents:
diff changeset
113
4419e9980172 Uploaded
devteam
parents:
diff changeset
114 Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer.
4419e9980172 Uploaded
devteam
parents:
diff changeset
115
4419e9980172 Uploaded
devteam
parents:
diff changeset
116 -----
4419e9980172 Uploaded
devteam
parents:
diff changeset
117
4419e9980172 Uploaded
devteam
parents:
diff changeset
118 .. class:: warningmark
4419e9980172 Uploaded
devteam
parents:
diff changeset
119
4419e9980172 Uploaded
devteam
parents:
diff changeset
120 **DANGER: Multiple Outputs**
4419e9980172 Uploaded
devteam
parents:
diff changeset
121
4419e9980172 Uploaded
devteam
parents:
diff changeset
122 Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing!
4419e9980172 Uploaded
devteam
parents:
diff changeset
123
4419e9980172 Uploaded
devteam
parents:
diff changeset
124 @dataset_collections@
4419e9980172 Uploaded
devteam
parents:
diff changeset
125
4419e9980172 Uploaded
devteam
parents:
diff changeset
126 @description@
4419e9980172 Uploaded
devteam
parents:
diff changeset
127
4419e9980172 Uploaded
devteam
parents:
diff changeset
128 FASTQ=File
4419e9980172 Uploaded
devteam
parents:
diff changeset
129 F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq).
4419e9980172 Uploaded
devteam
parents:
diff changeset
130 Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
4419e9980172 Uploaded
devteam
parents:
diff changeset
131
4419e9980172 Uploaded
devteam
parents:
diff changeset
132 SECOND_END_FASTQ=File
4419e9980172 Uploaded
devteam
parents:
diff changeset
133 F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null.
4419e9980172 Uploaded
devteam
parents:
diff changeset
134 Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
4419e9980172 Uploaded
devteam
parents:
diff changeset
135
4419e9980172 Uploaded
devteam
parents:
diff changeset
136 UNPAIRED_FASTQ=File
4419e9980172 Uploaded
devteam
parents:
diff changeset
137 FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default
4419e9980172 Uploaded
devteam
parents:
diff changeset
138 value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
4419e9980172 Uploaded
devteam
parents:
diff changeset
139
4419e9980172 Uploaded
devteam
parents:
diff changeset
140 OUTPUT_PER_RG=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
141 OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is
4419e9980172 Uploaded
devteam
parents:
diff changeset
142 paired). Default value: false. Possible values: {true, false} Cannot be used in
4419e9980172 Uploaded
devteam
parents:
diff changeset
143 conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F)
4419e9980172 Uploaded
devteam
parents:
diff changeset
144
4419e9980172 Uploaded
devteam
parents:
diff changeset
145 OUTPUT_DIR=File
4419e9980172 Uploaded
devteam
parents:
diff changeset
146 ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.
4419e9980172 Uploaded
devteam
parents:
diff changeset
147 Default value: null.
4419e9980172 Uploaded
devteam
parents:
diff changeset
148
4419e9980172 Uploaded
devteam
parents:
diff changeset
149 RE_REVERSE=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
150 RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them
4419e9980172 Uploaded
devteam
parents:
diff changeset
151 to fastq Default value: true. Possible values: {true, false}
4419e9980172 Uploaded
devteam
parents:
diff changeset
152
4419e9980172 Uploaded
devteam
parents:
diff changeset
153 INTERLEAVE=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
154 INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe
4419e9980172 Uploaded
devteam
parents:
diff changeset
155 which end it came from Default value: false. Possible values: {true, false}
4419e9980172 Uploaded
devteam
parents:
diff changeset
156
4419e9980172 Uploaded
devteam
parents:
diff changeset
157 INCLUDE_NON_PF_READS=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
158 NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes
4419e9980172 Uploaded
devteam
parents:
diff changeset
159 filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads.
4419e9980172 Uploaded
devteam
parents:
diff changeset
160 Default value: false. Possible values: {true, false}
4419e9980172 Uploaded
devteam
parents:
diff changeset
161
4419e9980172 Uploaded
devteam
parents:
diff changeset
162 CLIPPING_ATTRIBUTE=String
4419e9980172 Uploaded
devteam
parents:
diff changeset
163 CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default
4419e9980172 Uploaded
devteam
parents:
diff changeset
164 value: null.
4419e9980172 Uploaded
devteam
parents:
diff changeset
165
4419e9980172 Uploaded
devteam
parents:
diff changeset
166 CLIPPING_ACTION=String
4419e9980172 Uploaded
devteam
parents:
diff changeset
167 CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities
4419e9980172 Uploaded
devteam
parents:
diff changeset
168 should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in
4419e9980172 Uploaded
devteam
parents:
diff changeset
169 the clipped region; and any integer means that the base qualities should be set to that
4419e9980172 Uploaded
devteam
parents:
diff changeset
170 value in the clipped region. Default value: null.
4419e9980172 Uploaded
devteam
parents:
diff changeset
171
4419e9980172 Uploaded
devteam
parents:
diff changeset
172 READ1_TRIM=Integer
4419e9980172 Uploaded
devteam
parents:
diff changeset
173 R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0.
4419e9980172 Uploaded
devteam
parents:
diff changeset
174
4419e9980172 Uploaded
devteam
parents:
diff changeset
175 READ1_MAX_BASES_TO_WRITE=Integer
4419e9980172 Uploaded
devteam
parents:
diff changeset
176 R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than
4419e9980172 Uploaded
devteam
parents:
diff changeset
177 this many bases left after trimming, all will be written. If this value is null then all
4419e9980172 Uploaded
devteam
parents:
diff changeset
178 bases left after trimming will be written. Default value: null.
4419e9980172 Uploaded
devteam
parents:
diff changeset
179
4419e9980172 Uploaded
devteam
parents:
diff changeset
180 READ2_TRIM=Integer
4419e9980172 Uploaded
devteam
parents:
diff changeset
181 R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0.
4419e9980172 Uploaded
devteam
parents:
diff changeset
182
4419e9980172 Uploaded
devteam
parents:
diff changeset
183 READ2_MAX_BASES_TO_WRITE=Integer
4419e9980172 Uploaded
devteam
parents:
diff changeset
184 R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than
4419e9980172 Uploaded
devteam
parents:
diff changeset
185 this many bases left after trimming, all will be written. If this value is null then all
4419e9980172 Uploaded
devteam
parents:
diff changeset
186 bases left after trimming will be written. Default value: null.
4419e9980172 Uploaded
devteam
parents:
diff changeset
187
4419e9980172 Uploaded
devteam
parents:
diff changeset
188 INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean
4419e9980172 Uploaded
devteam
parents:
diff changeset
189 If true, include non-primary alignments in the output. Support of non-primary alignments
4419e9980172 Uploaded
devteam
parents:
diff changeset
190 in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and
4419e9980172 Uploaded
devteam
parents:
diff changeset
191 there are paired reads with non-primary alignments. Default value: false.
4419e9980172 Uploaded
devteam
parents:
diff changeset
192 Possible values: {true, false}
4419e9980172 Uploaded
devteam
parents:
diff changeset
193
4419e9980172 Uploaded
devteam
parents:
diff changeset
194 @more_info@
4419e9980172 Uploaded
devteam
parents:
diff changeset
195
4419e9980172 Uploaded
devteam
parents:
diff changeset
196 </help>
4419e9980172 Uploaded
devteam
parents:
diff changeset
197 </tool>
4419e9980172 Uploaded
devteam
parents:
diff changeset
198
4419e9980172 Uploaded
devteam
parents:
diff changeset
199