0
|
1 <tool name="SamToFastq" id="picard_SamToFastq" version="1.135">
|
|
2 <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description>
|
|
3 <macros>
|
|
4 <import>picard_macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <command>
|
|
8
|
|
9 echo "BAM" > $report && ## This is necessary for output dataset detection (see output tags below)
|
|
10
|
|
11 @java_options@
|
|
12
|
|
13 java -jar \$JAVA_JAR_PATH/picard.jar
|
|
14 SamToFastq
|
|
15
|
|
16 INPUT="${inputFile}"
|
|
17
|
|
18 #if str( $output_per_rg ) == "true":
|
|
19 OUTPUT_PER_RG=true
|
|
20 OUTPUT_DIR=.
|
|
21 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false":
|
|
22 FASTQ=READ1.fastq
|
|
23 SECOND_END_FASTQ=READ2.fastq
|
|
24 UNPAIRED_FASTQ=UNPAIRED_READS.fastq
|
|
25 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true":
|
|
26 FASTQ=INTERLEAVED.fastq
|
|
27 #end if
|
|
28
|
|
29 RE_REVERSE="${re_reverse}"
|
|
30 INTERLEAVE="${interleave}"
|
|
31 INCLUDE_NON_PF_READS="${include_non_pf_reads}"
|
|
32 CLIPPING_ATTRIBUTE="${clipping_attribute}"
|
|
33 CLIPPING_ACTION="${clipping_action}"
|
|
34 READ1_TRIM="${read1_trim}"
|
|
35
|
|
36 #if int($read1_max_bases_to_write) > -1:
|
|
37 READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}"
|
|
38 #end if
|
|
39
|
|
40 READ2_TRIM="${read2_trim}"
|
|
41
|
|
42 #if int($read2_max_bases_to_write) > -1:
|
|
43 READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}"
|
|
44 #end if
|
|
45
|
|
46 INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}"
|
|
47
|
|
48
|
|
49 VALIDATION_STRINGENCY="${validation_stringency}"
|
|
50 QUIET=true
|
|
51 VERBOSITY=ERROR
|
|
52
|
|
53 </command>
|
|
54 <inputs>
|
|
55
|
|
56 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
|
|
57 <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/>
|
|
58 <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/>
|
|
59 <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/>
|
|
60 <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/>
|
|
61 <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/>
|
|
62 <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/>
|
|
63 <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/>
|
|
64 <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
|
|
65 <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/>
|
|
66 <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
|
|
67 <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/>
|
|
68
|
|
69 <expand macro="VS" />
|
|
70
|
|
71 </inputs>
|
|
72
|
|
73 <outputs>
|
|
74 <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files -->
|
|
75 <data format="txt" name="report" label="SamToFastq run" hidden="true">
|
|
76 <discover_datasets pattern="(?P<designation>.+)\.fastq" ext="fastqsanger" visible="true"/>
|
|
77 </data>
|
|
78 </outputs>
|
|
79
|
|
80 <tests>
|
|
81 <test>
|
|
82 <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/>
|
|
83 <param name="output_per_rg" value="false"/>
|
|
84 <param name="re_reverse" value="true"/>
|
|
85 <param name="interleave" value="true"/>
|
|
86 <param name="include_non_pf_reads" value="false"/>
|
|
87 <param name="clipping_attribute" value="null" />
|
|
88 <param name="clipping_action" value="null" />
|
|
89 <param name="read1_trim" value="0" />
|
|
90 <param name="read1_max_bases_to_write" value="-1"/>
|
|
91 <param name="read2_trim" value="0" />
|
|
92 <param name="read2_max_bases_to_write" value="-1"/>
|
|
93 <param name="include_non_primary_alignments" value="false"/>
|
|
94 <output name="report">
|
|
95 <assert_contents>
|
|
96 <has_line line="BAM" />
|
|
97 </assert_contents>
|
|
98 <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/>
|
|
99 </output>
|
|
100 </test>
|
|
101 </tests>
|
|
102
|
|
103 <stdio>
|
|
104 <exit_code range="1:" level="fatal"/>
|
|
105 </stdio>
|
|
106
|
|
107 <help>
|
|
108
|
|
109 **Purpose**
|
|
110
|
|
111 Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer.
|
|
112
|
|
113 -----
|
|
114
|
|
115 .. class:: warningmark
|
|
116
|
|
117 **DANGER: Multiple Outputs**
|
|
118
|
|
119 Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing!
|
|
120
|
|
121 @dataset_collections@
|
|
122
|
|
123 @description@
|
|
124
|
|
125 FASTQ=File
|
|
126 F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq).
|
|
127 Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
|
|
128
|
|
129 SECOND_END_FASTQ=File
|
|
130 F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null.
|
|
131 Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
|
|
132
|
|
133 UNPAIRED_FASTQ=File
|
|
134 FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default
|
|
135 value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
|
|
136
|
|
137 OUTPUT_PER_RG=Boolean
|
|
138 OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is
|
|
139 paired). Default value: false. Possible values: {true, false} Cannot be used in
|
|
140 conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F)
|
|
141
|
|
142 OUTPUT_DIR=File
|
|
143 ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.
|
|
144 Default value: null.
|
|
145
|
|
146 RE_REVERSE=Boolean
|
|
147 RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them
|
|
148 to fastq Default value: true. Possible values: {true, false}
|
|
149
|
|
150 INTERLEAVE=Boolean
|
|
151 INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe
|
|
152 which end it came from Default value: false. Possible values: {true, false}
|
|
153
|
|
154 INCLUDE_NON_PF_READS=Boolean
|
|
155 NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes
|
|
156 filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads.
|
|
157 Default value: false. Possible values: {true, false}
|
|
158
|
|
159 CLIPPING_ATTRIBUTE=String
|
|
160 CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default
|
|
161 value: null.
|
|
162
|
|
163 CLIPPING_ACTION=String
|
|
164 CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities
|
|
165 should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in
|
|
166 the clipped region; and any integer means that the base qualities should be set to that
|
|
167 value in the clipped region. Default value: null.
|
|
168
|
|
169 READ1_TRIM=Integer
|
|
170 R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0.
|
|
171
|
|
172 READ1_MAX_BASES_TO_WRITE=Integer
|
|
173 R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than
|
|
174 this many bases left after trimming, all will be written. If this value is null then all
|
|
175 bases left after trimming will be written. Default value: null.
|
|
176
|
|
177 READ2_TRIM=Integer
|
|
178 R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0.
|
|
179
|
|
180 READ2_MAX_BASES_TO_WRITE=Integer
|
|
181 R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than
|
|
182 this many bases left after trimming, all will be written. If this value is null then all
|
|
183 bases left after trimming will be written. Default value: null.
|
|
184
|
|
185 INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean
|
|
186 If true, include non-primary alignments in the output. Support of non-primary alignments
|
|
187 in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and
|
|
188 there are paired reads with non-primary alignments. Default value: false.
|
|
189 Possible values: {true, false}
|
|
190
|
|
191 @more_info@
|
|
192
|
|
193 </help>
|
|
194 </tool>
|
|
195
|
|
196
|