comparison picard_FastqToSam.xml @ 5:3d4f1fa26f0e draft

Uploaded
author devteam
date Tue, 16 Dec 2014 19:03:21 -0500
parents bf1c3f9f8282
children 3a3234d7a2e8
comparison
equal deleted inserted replaced
4:ab1f60c26526 5:3d4f1fa26f0e
1 <tool id="picard_FastqToSam" name="FASTQ to BAM" version="1.56.0"> 1 <tool name="FastqToSam" id="picard_FastqToSam" version="1.126.0">
2 <description>creates an unaligned BAM file</description> 2 <description>convert Fastq data into unaligned BAM</description>
3 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> 3 <requirements>
4 <!-- Dan Blankenberg --> 4 <requirement type="package" version="1.126.0">picard</requirement>
5 <command>java -XX:DefaultMaxRAMFraction=1 -XX:+UseParallelGC 5 </requirements>
6 -jar "\$JAVA_JAR_PATH/FastqToSam.jar" 6
7 FASTQ="${input_fastq1}" 7 <macros>
8 #if str( $input_fastq2) != "None": 8 <import>picard_macros.xml</import>
9 FASTQ2="${input_fastq2}" 9 </macros>
10 #end if 10
11 QUALITY_FORMAT="${ dict( fastqsanger='Standard', fastqcssanger='Standard', fastqillumina='Illumina', fastqsolexa='Solexa' )[ $input_fastq1.ext ] }" ##Solexa, Illumina, Standard 11 <command>
12 OUTPUT="${output_bam}" 12 @java_options@
13
14 java -jar \$JAVA_JAR_PATH/picard.jar
15 FastqToSam
16
17 #if str( $input_type.input_type_selector ) == "se":
18 FASTQ="${input_type.fastq}"
19 #elif str( $input_type.input_type_selector ) == "pe":
20 FASTQ="${input_type.fastq}"
21 FASTQ2="${input_type.fastq2}"
22 #else
23 FASTQ="${input_type.fastq.forward}"
24 FASTQ2="${input_type.fastq.reverse}"
25 #end if
26
27 QUALITY_FORMAT="${quality_format}"
28 OUTPUT="${outFile}"
13 READ_GROUP_NAME="${read_group_name}" 29 READ_GROUP_NAME="${read_group_name}"
14 SAMPLE_NAME="${sample_name}" 30 SAMPLE_NAME="${sample_name}"
15 #if $param_type.param_type_selector == "advanced": 31
16 #if str( $param_type.library_name ) != "": 32 #if str( $library_name ):
17 LIBRARY_NAME="${param_type.library_name}" 33 LIBRARY_NAME="${library_name}"
18 #end if 34 #end if
19 #if str( $param_type.platform_unit ) != "": 35
20 PLATFORM_UNIT="${param_type.platform_unit}" 36 #if str( $platform_unit ):
21 #end if 37 PLATFORM_UNIT="${platform_unit}"
22 #if str( $param_type.platform ) != "": 38 #end if
23 PLATFORM="${param_type.platform}" 39
24 #end if 40 #if str( $platform ):
25 #if str( $param_type.sequencing_center ) != "": 41 PLATFORM="${platform}"
26 SEQUENCING_CENTER="${param_type.sequencing_center}" 42 #end if
27 #end if 43
28 #if str( $param_type.predicted_insert_size ) != "": 44 #if str( $sequencing_center ):
29 PREDICTED_INSERT_SIZE="${param_type.predicted_insert_size}" 45 SEQUENCING_CENTER="${sequencing_center}"
30 #end if 46 #end if
31 #if str( $param_type.description.value ) != "": 47
32 DESCRIPTION="${param_type.description}" 48 #if str( $predicted_insert_size ):
33 #end if 49 PREDICTED_INSERT_SIZE="${predicted_insert_size}"
34 #if str( $param_type.run_date ) != "": 50 #end if
35 RUN_DATE="${param_type.run_date}" 51
36 #end if 52 #if str( $comment ):
37 #if str( $param_type.min_q ) != "": 53 COMMENT="${comment}"
38 MIN_Q="${param_type.min_q}" 54 #end if
39 #end if 55
40 #if str( $param_type.max_q ) != "": 56 #if str( $description ):
41 MAX_Q="${param_type.max_q}" 57 DESCRIPTION="${description}"
42 #end if 58 #end if
43 SORT_ORDER="${param_type.sort_order}" 59
44 #else: 60 #if str( $run_date ):
45 SORT_ORDER=coordinate ##unsorted, queryname, coordinate; always use coordinate 61 RUN_DATE="${run_date}"
46 #end if 62 #end if
47 2&gt;&amp;1 63
48 || echo "Error running Picard FastqToSAM" >&amp;2 64 MIN_Q="${min_q}"
65 MAX_Q="${max_q}"
66 STRIP_UNPAIRED_MATE_NUMBER="${strip_unpairied_mate_number}"
67 ALLOW_AND_IGNORE_EMPTY_LINES="${allow_and_ignore_empty_lines}"
68
69 SORT_ORDER=coordinate
70 VALIDATION_STRINGENCY="${validation_stringency}"
71 QUIET=true
72 VERBOSITY=ERROR
73
49 </command> 74 </command>
50 <inputs> 75 <inputs>
51 <param name="input_fastq1" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" label="FASTQ file" /> <!-- confirm that fastqcssanger also works --> 76 <conditional name="input_type">
52 <param name="input_fastq2" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" optional="True" label="Second FASTQ of paired end data" help="Only needed when using paired end data." > 77 <param name="input_type_selector" type="select" label="What is your input data" help="Select between single end, paired end, and collections. See help below for full explanation of dataset types">
53 <options options_filter_attribute="ext" from_parameter="tool.app.datatypes_registry.datatypes_by_extension" transform_lines="obj.keys()"> 78 <option value="se">Single end (single dataset)</option>
54 <column name="name" index="0"/> 79 <option value="pe">Paired end (two datasets)</option>
55 <column name="value" index="0"/> 80 <option value="pc">Paired collection</option>
56 <filter type="param_value" ref="input_fastq1" ref_attribute="ext" column="0"/>
57 </options>
58 </param>
59 <param name="read_group_name" type="text" value="A" label="Read Group Name" />
60 <param name="sample_name" type="text" value="unknown sample" label="Sample Name" />
61 <conditional name="param_type">
62 <param name="param_type_selector" type="select" label="Basic or Advanced options">
63 <option value="basic" selected="True">Basic</option>
64 <option value="advanced">Advanced</option>
65 </param> 81 </param>
66 <when value="basic"> 82 <when value="se">
67 <!-- Do nothing here --> 83 <param name="fastq" type="data" format="fastq" label="Input fastq file for single end data" help="FASTQ"/>
68 </when> 84 </when>
69 <when value="advanced"> 85 <when value="pe">
70 <param name="library_name" type="text" value="" label="Library Name" /> 86 <param name="fastq" type="data" format="fastq" label="Input fastq file for the first read in paired end data" help="FASTQ"/>
71 <param name="platform_unit" type="text" value="" label="Platform Unit" /> 87 <param name="fastq2" type="data" format="fastq" label="Input fastq file for the second read of paired end data" help="FASTQ2"/>
72 <param name="platform" type="text" value="" label="Platform" /> 88 </when>
73 <param name="sequencing_center" type="text" value="" label="Sequencing Center" /> 89 <when value="pc">
74 <param name="predicted_insert_size" type="integer" value="" optional="True" label="Predicted Insert Size" /> 90 <param name="fastq" type="data_collection" collection_type="paired" label="FASTQ paired dataset collection" help="FASTQ and FASTQ2; A collection of two datasets with forward and reverse reads. See help below on explanation of dataset collections"/>
75 <param name="description" type="text" value="" label="Description" />
76 <param name="run_date" type="text" value="" label="Run Date" />
77 <param name="min_q" type="integer" optional="True" value="0" label="Min Q" />
78 <param name="max_q" type="integer" optional="True" value="93" label="Max Q" />
79 <param name="sort_order" type="select" label="Sort order">
80 <option value="coordinate" selected="True">coordinate</option>
81 <option value="queryname">queryname</option>
82 <option value="unsorted">unsorted</option>
83 </param>
84 </when> 91 </when>
85 </conditional> 92 </conditional>
86 </inputs> 93
94 <param name="quality_format" type="select" label="Select quality encoding scheme" help="QUALITY_FORMAT">
95 <option value="Standard" selected="True">Sanger (+33)</option>
96 <option value="Illumina">Illumina (+64)</option>
97 <option value="Solexa">Solexa (+66)</option>
98 </param>
99
100 <param name="read_group_name" type="text" size="20" value="A" label="Read group name" help="READ_GROUP_NAME"/>
101 <param name="sample_name" type="text" size="20" value="sample-a" label="Sample name" help="SAMPLE_NAME"/>
102 <param name="library_name" type="text" size="20" optional="True" label="The library name" help="LIBRARY_NAME; Optional"/>
103 <param name="platform_unit" type="text" size="20" optional="True" label="The platform unit (often run_barcode.lane)" help="PLATFORM_UNIT; Optional"/>
104 <param name="platform" type="text" size="20" optional="True" label="The platform type (e.g. illumina, 454)" help="PLATFORM; Optional"/>
105 <param name="sequencing_center" type="text" size="20" optional="True" label="The sequencing center from which the data originated" help="SEQUENCING_CENTER; Optional"/>
106
107 <param name="predicted_insert_size" type="integer" min="0" max="100000" optional="True" label="Predicted median insert size, to insert into the read group header" help="PREDICTED_INSERT_SIZE; Optional"/>
108 <param name="comment" type="text" size="20" optional="True" label="Comment to include in the output dataset's header" help="COMMENT; Optional"/>
109 <param name="description" type="text" size="20" optional="True" label="Optional description information" help="DESCRIPTION; Optional"/>
110 <param name="run_date" optional="True" type="text" label="Run date" help="RGDT; Optional; Format=YYYY-MM-DD (eg 1997-07-16)"/>
111 <param name="min_q" type="integer" value="0" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MIN_Q; An exception will be thrown if a quality is less than this value; default=0"/>
112 <param name="max_q" type="integer" value="93" min="0" max="100" label="Minimum quality allowed in the input fastq" help="MAX_Q; An exception will be thrown if a quality is greater than this value; default=93"/>
113 <param name="strip_unpairied_mate_number" type="boolean" truevalue="true" falsevalue="false" label="If true and this is an unpaired fastq any occurance of '/1' will be removed from the end of a read name" help="STRIP_UNPAIRED_MATE_NUMBER; default=false"/>
114 <param name="allow_and_ignore_empty_lines" type="boolean" truevalue="true" falsevalue="false" label="Allow (and ignore) empty lines" help="ALLOW_AND_IGNORE_EMPTY_LINES; default=false"/>
115
116 <expand macro="VS" />
117
118 </inputs>
119
87 <outputs> 120 <outputs>
88 <data format="bam" name="output_bam" /> 121 <data format="bam" name="outFile" label="${tool.name} on ${on_string}: reads as unaligned BAM"/>
89 </outputs> 122 </outputs>
123
90 <tests> 124 <tests>
91 <test> 125 <test>
92 <param name="input_fastq1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" /> 126 <param name="input_type_selector" value="pe" />
93 <param name="input_fastq2" /> 127 <param name="quality_format" value="Standard" />
94 <param name="read_group_name" value="A" /> 128 <param name="read_group_name" value="A" />
95 <param name="sample_name" value="unknown sample" /> 129 <param name="sample_name" value="sample-a" />
96 <param name="param_type_selector" value="basic" /> 130 <param name="library_name" value="A"/>
97 <output name="output_bam" file="picard_fastq_to_sam_out1.bam" ftype="bam"/> 131 <param name="platform_unit" value="A"/>
98 </test> 132 <param name="platform" value="Illumina"/>
99 <test> 133 <param name="sequencing_center" value="A"/>
100 <param name="input_fastq1" value="bwa_wrapper_in2.fastqsanger" ftype="fastqsanger" /> 134 <param name="predicted_insert_size" value="300"/>
101 <param name="input_fastq2" value="bwa_wrapper_in3.fastqsanger" ftype="fastqsanger" /> 135 <param name="comment" value="A"/>
102 <param name="read_group_name" value="A" /> 136 <param name="description" value="A"/>
103 <param name="sample_name" value="unknown sample" /> 137 <param name="run_date" value="2014-10-10"/>
104 <param name="param_type_selector" value="basic" /> 138 <param name="min_q" value="0" />
105 <output name="output_bam" file="picard_fastq_to_sam_out2.bam" ftype="bam"/> 139 <param name="max_q" value="93" />
106 </test> 140 <param name="strip_unpairied_mate_number" value="False" />
141 <param name="allow_and_ignore_empty_lines" value="False" />
142 <param name="validation_stringency" value="LENIENT"/>
143 <param name="fastq" value="picard_FastqToSam_read1.fq" ftype="fastq" />
144 <param name="fastq2" value="picard_FastqToSam_read2.fq" ftype="fastq" />
145 <output name="outFile" file="picard_FastqToSam_test1.bam" ftype="bam" lines_diff="4"/>
146 </test>
107 </tests> 147 </tests>
148
149 <stdio>
150 <exit_code range="1:" level="fatal"/>
151 </stdio>
152
108 <help> 153 <help>
109 **What it does** 154
110 155 .. class:: infomark
111 Picard: FastqToSam converts FASTQ files to unaligned BAM files. 156
112 157 **Purpose**
113 ------ 158
114 159 Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
115 Please cite the website "http://picard.sourceforge.net". 160
116 161 @dataset_collections@
117 ------ 162
118 163 @RG@
119 164
120 **Input formats** 165 @description@
121 166
122 FastqToSam accepts FASTQ input files. If using paired-end data, you should select two FASTQ files. 167 FASTQ=File
123 168 F1=File Input fastq file for single end data, or first read in paired end
124 ------ 169 data. Required.
125 170
126 **Outputs** 171 FASTQ2=File
127 172 F2=File Input fastq file for the second read of paired end data (if used).
128 The output is in BAM format, see http://samtools.sourceforge.net for more details. 173
129 174 QUALITY_FORMAT=FastqQualityFormat
130 ------- 175 V=FastqQualityFormat A value describing how the quality values are encoded in the fastq. Either Solexa for
131 176 pre-pipeline 1.3 style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above
132 **FastqToSam settings** 177 (phred scaling + 64) or Standard for phred scaled scores with a character shift of 33.
133 178 If this value is not specified, the quality format will be detected automatically.
134 This is list of FastqToSam options:: 179 Default value: null. Possible values: {Solexa, Illumina, Standard}
135 180
136 READ_GROUP_NAME=String Read group name Default value: A. This option can be set to 'null' to clear the default value. 181 READ_GROUP_NAME=String
137 SAMPLE_NAME=String Sample name to insert into the read group header Required. 182 RG=String Read group name Default value: A.
138 LIBRARY_NAME=String The library name to place into the LB attribute in the read group header Default value: null. 183
139 PLATFORM_UNIT=String The platform unit (often run_barcode.lane) to insert into the read group header Default value: null. 184 SAMPLE_NAME=String
140 PLATFORM=String The platform type (e.g. illumina, solid) to insert into the read group header Default value: null. 185 SM=String Sample name to insert into the read group header Required.
141 SEQUENCING_CENTER=String The sequencing center from which the data originated Default value: null. 186
142 PREDICTED_INSERT_SIZE=Integer Predicted median insert size, to insert into the read group header Default value: null. 187 LIBRARY_NAME=String
143 DESCRIPTION=String Inserted into the read group header Default value: null. 188 LB=String The library name to place into the LB attribute in the read group header.
189
190 PLATFORM_UNIT=String
191 PU=String The platform unit (often run_barcode.lane) to insert into the read group header.
192
193 PLATFORM=String
194 PL=String The platform type (e.g. illumina, solid) to insert into the read group header.
195
196 SEQUENCING_CENTER=String
197 CN=String The sequencing center from which the data originated.
198
199 PREDICTED_INSERT_SIZE=Integer
200 PI=Integer Predicted median insert size, to insert into the read group header.
201
202 COMMENT=String
203 CO=String Comment to include in the merged output file's header.
204
205 DESCRIPTION=String
206 DS=String Inserted into the read group header.
207
208 RUN_DATE=Iso8601Date
209 DT=Iso8601Date Date the run was produced, to insert into the read group header.
210
211 MIN_Q=Integer Minimum quality allowed in the input fastq. An exception will be thrown if a quality is
212 less than this value. Default value: 0.
213
214 MAX_Q=Integer Maximum quality allowed in the input fastq. An exception will be thrown if a quality is
215 greater than this value. Default value: 93.
216
217 STRIP_UNPAIRED_MATE_NUMBER=Boolean
218 If true and this is an unpaired fastq any occurance of '/1' will be removed from the end
219 of a read name. Default value: false. Possible values: {true, false}
220
221 ALLOW_AND_IGNORE_EMPTY_LINES=Boolean
222 Allow (and ignore) empty lines Default value: false. Possible values: {true, false}
223
224
225 @more_info@
226
144 </help> 227 </help>
145 </tool> 228 </tool>
229
230