comparison picard_SamToFastq.xml @ 0:b76a4f17bbbb draft

Uploaded
author devteam
date Thu, 23 Oct 2014 11:31:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b76a4f17bbbb
1 <tool name="SamToFastq" id="picard_SamToFastq" version="1.122.0">
2 <description>extract reads and qualities from SAM/BAM dataset and convert to fastq</description>
3 <requirements>
4 <requirement type="package" version="1.122.0">picard</requirement>
5 </requirements>
6
7 <macros>
8 <import>picard_macros.xml</import>
9 </macros>
10
11 <command>
12
13 echo "BAM" > $report &amp;&amp; ## This is necessary for output dataset detection (see output tags below)
14
15 @java_options@
16
17 java -jar \$JAVA_JAR_PATH/SamToFastq.jar
18
19 INPUT="${inputFile}"
20
21 #if str( $output_per_rg ) == "true":
22 OUTPUT_PER_RG=true
23 OUTPUT_DIR=.
24 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "false":
25 FASTQ=READ1.fastq
26 SECOND_END_FASTQ=READ2.fastq
27 UNPAIRED_FASTQ=UNPAIRED_READS.fastq
28 #elif str( $output_per_rg ) == "false" and str( $interleave ) == "true":
29 FASTQ=INTERLEAVED.fastq
30 #end if
31
32 RE_REVERSE="${re_reverse}"
33 INTERLEAVE="${interleave}"
34 INCLUDE_NON_PF_READS="${include_non_pf_reads}"
35 CLIPPING_ATTRIBUTE="${clipping_attribute}"
36 CLIPPING_ACTION="${clipping_action}"
37 READ1_TRIM="${read1_trim}"
38
39 #if int($read1_max_bases_to_write) > -1:
40 READ1_MAX_BASES_TO_WRITE="${read1_max_bases_to_write}"
41 #end if
42
43 READ2_TRIM="${read2_trim}"
44
45 #if int($read2_max_bases_to_write) > -1:
46 READ2_MAX_BASES_TO_WRITE="${read2_max_bases_to_write}"
47 #end if
48
49 INCLUDE_NON_PRIMARY_ALIGNMENTS="${include_non_primary_alignments}"
50
51
52 VALIDATION_STRINGENCY="${validation_stringency}"
53 QUIET=true
54 VERBOSITY=ERROR
55
56 </command>
57 <inputs>
58
59 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
60 <param name="output_per_rg" type="boolean" checked="False" label="Do you want to output a fastq file per read group (two fastq files per read group if the group is paired)" help="OUTPUT_PER_RG; default=False"/>
61 <param name="re_reverse" type="boolean" checked="True" label="Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq" help="RE_REVERSE; default=True"/>
62 <param name="interleave" type="boolean" label="Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe which end it came from" help="INTERLEAVE; default=False"/>
63 <param name="include_non_pf_reads" type="boolean" label="Include non-PF reads from the SAM/BAM dataset into the output FASTQ" help="INCLUDE_NON_PF_READS; PF means 'passes filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads; default=False"/>
64 <param name="clipping_attribute" type="text" size="4" value="null" label="The attribute that stores the position at which the SAM/BAM record should be clipped" help="CLIPPING_ATTRIBUTE; default=null"/>
65 <param name="clipping_action" type="text" size="10" value="null" label="The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region" help="CLIPPING_ACTION; default=null"/>
66 <param name="read1_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 1" help="READ1_TRIM; default=0"/>
67 <param name="read1_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 1 after trimming" help="READ1_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
68 <param name="read2_trim" type="integer" value="0" min="0" label="The number of bases to trim from the beginning of read 2" help="READ2_TRIM; default=0"/>
69 <param name="read2_max_bases_to_write" type="integer" value="-1" label="The maximum number of bases to write from read 2 after trimming" help="READ2_MAX_BASES_TO_WRITE; If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written; default=null (-1)"/>
70 <param name="include_non_primary_alignments" type="boolean" label="If true, include non-primary alignments in the output" help="INCLUDE_NON_PRIMARY_ALIGNMENTS; Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments; default=False"/>
71
72 <expand macro="VS" />
73
74 </inputs>
75
76 <outputs>
77 <!-- here dataset discovery is based on fact that if OUTPUT_PER_RG=true this tool automatically adds .fastq extension to emitted files -->
78 <data format="txt" name="report" label="SamToFastq run" hidden="true">
79 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fastq" ext="fastqsanger" visible="true"/>
80 </data>
81 </outputs>
82
83 <tests>
84 <test>
85 <param name="inputFile" value="picard_SamToFastq.bam" ftype="bam"/>
86 <param name="output_per_rg" value="false"/>
87 <param name="re_reverse" value="true"/>
88 <param name="interleave" value="true"/>
89 <param name="include_non_pf_reads" value="false"/>
90 <param name="clipping_attribute" value="null" />
91 <param name="clipping_action" value="null" />
92 <param name="read1_trim" value="0" />
93 <param name="read1_max_bases_to_write" value="-1"/>
94 <param name="read2_trim" value="0" />
95 <param name="read2_max_bases_to_write" value="-1"/>
96 <param name="include_non_primary_alignments" value="false"/>
97 <output name="report">
98 <assert_contents>
99 <has_line line="BAM" />
100 </assert_contents>
101 <discovered_dataset designation="INTERLEAVED" file="picard_SamToFastq_test1.fq" ftype="fastqsanger"/>
102 </output>
103 </test>
104 </tests>
105
106 <stdio>
107 <exit_code range="1:" level="fatal"/>
108 </stdio>
109
110 <help>
111
112 **Purpose**
113
114 Extracts read sequences and qualities from the input SAM/BAM dataset and outputs them in Sanger fastq format. In the RE_REVERSE=True mode (default behavior), if the read is aligned and the alignment is to the reverse strand on the genome, the read's sequence from input SAM.BAM dataset will be reverse-complemented prior to writing it to fastq in order restore correctly the original read sequence as it was generated by the sequencer.
115
116 -----
117
118 .. class:: warningmark
119
120 **DANGER: Multiple Outputs**
121
122 Generating per readgroup fastq (setting **OUTPUT_PER_RG** to True) may produce very large numbers of outputs. Know what you are doing!
123
124 @dataset_collections@
125
126 @description@
127
128 FASTQ=File
129 F=File Output fastq file (single-end fastq or, if paired, first end of the pair fastq).
130 Required. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
131
132 SECOND_END_FASTQ=File
133 F2=File Output fastq file (if paired, second end of the pair fastq). Default value: null.
134 Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
135
136 UNPAIRED_FASTQ=File
137 FU=File Output fastq file for unpaired reads; may only be provided in paired-fastq mode Default
138 value: null. Cannot be used in conjuction with option(s) OUTPUT_PER_RG (OPRG)
139
140 OUTPUT_PER_RG=Boolean
141 OPRG=Boolean Output a fastq file per read group (two fastq files per read group if the group is
142 paired). Default value: false. Possible values: {true, false} Cannot be used in
143 conjuction with option(s) SECOND_END_FASTQ (F2) UNPAIRED_FASTQ (FU) FASTQ (F)
144
145 OUTPUT_DIR=File
146 ODIR=File Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.
147 Default value: null.
148
149 RE_REVERSE=Boolean
150 RC=Boolean Re-reverse bases and qualities of reads with negative strand flag set before writing them
151 to fastq Default value: true. Possible values: {true, false}
152
153 INTERLEAVE=Boolean
154 INTER=Boolean Will generate an interleaved fastq if paired, each line will have /1 or /2 to describe
155 which end it came from Default value: false. Possible values: {true, false}
156
157 INCLUDE_NON_PF_READS=Boolean
158 NON_PF=Boolean Include non-PF reads from the SAM file into the output FASTQ files. PF means 'passes
159 filtering'. Reads whose 'not passing quality controls' flag is set are non-PF reads.
160 Default value: false. Possible values: {true, false}
161
162 CLIPPING_ATTRIBUTE=String
163 CLIP_ATTR=String The attribute that stores the position at which the SAM record should be clipped Default
164 value: null.
165
166 CLIPPING_ACTION=String
167 CLIP_ACT=String The action that should be taken with clipped reads: 'X' means the reads and qualities
168 should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in
169 the clipped region; and any integer means that the base qualities should be set to that
170 value in the clipped region. Default value: null.
171
172 READ1_TRIM=Integer
173 R1_TRIM=Integer The number of bases to trim from the beginning of read 1. Default value: 0.
174
175 READ1_MAX_BASES_TO_WRITE=Integer
176 R1_MAX_BASES=Integer The maximum number of bases to write from read 1 after trimming. If there are fewer than
177 this many bases left after trimming, all will be written. If this value is null then all
178 bases left after trimming will be written. Default value: null.
179
180 READ2_TRIM=Integer
181 R2_TRIM=Integer The number of bases to trim from the beginning of read 2. Default value: 0.
182
183 READ2_MAX_BASES_TO_WRITE=Integer
184 R2_MAX_BASES=Integer The maximum number of bases to write from read 2 after trimming. If there are fewer than
185 this many bases left after trimming, all will be written. If this value is null then all
186 bases left after trimming will be written. Default value: null.
187
188 INCLUDE_NON_PRIMARY_ALIGNMENTS=Boolean
189 If true, include non-primary alignments in the output. Support of non-primary alignments
190 in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and
191 there are paired reads with non-primary alignments. Default value: false.
192 Possible values: {true, false}
193
194 @more_info@
195
196 </help>
197 </tool>
198
199