comparison pal_finder_wrapper.xml @ 2:b6ccc7dd7b02 draft

Version 0.02.04.3.
author pjbriggs
date Fri, 04 Dec 2015 07:43:30 -0500
parents 771ebe02636f
children e1a14ed7a9d6
comparison
equal deleted inserted replaced
1:771ebe02636f 2:b6ccc7dd7b02
1 <tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.2"> 1 <tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.3">
2 <description>Find microsatellite repeat elements sequencing reads and design PCR primers to amplify them</description> 2 <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description>
3 <requirements>
4 <requirement type="package" version="5.16.3">perl</requirement>
5 <requirement type="package" version="0.02.04">pal_finder</requirement>
6 <requirement type="package" version="2.0.0">primer3_core</requirement>
7 <requirement type="package" version="1.65">biopython</requirement>
8 <requirement type="package" version="2.8.1">pandaseq</requirement>
9 </requirements>
3 <command interpreter="bash">pal_finder_wrapper.sh 10 <command interpreter="bash">pal_finder_wrapper.sh
4 #if str( $platform.platform_type ) == "illumina" 11 #if str( $platform.platform_type ) == "illumina"
5 $platform.input_fastq_r1 $platform.input_fastq_r2 12 #set $paired_input_type = $platform.paired_input_type_conditional.paired_input_type
13 #if $paired_input_type == "pair_of_files"
14 "$platform.paired_input_type_conditional.input_fastq_r1"
15 "$platform.paired_input_type_conditional.input_fastq_r2"
16 #else
17 "$platform.paired_input_type_conditional.input_fastq_pair.forward"
18 "$platform.paired_input_type_conditional.input_fastq_pair.reverse"
19 #end if
6 #else 20 #else
7 --454 $platform.input_fasta 21 --454 "$platform.input_fasta"
8 #end if 22 #end if
9 $output_microsat_summary $output_pal_summary 23 $output_microsat_summary $output_pal_summary
10 #if str( $platform.platform_type ) == "illumina" and $platform.filter_microsats
11 --filter_microsats $output_filtered_microsats
12 #end if
13 #if $keep_config_file 24 #if $keep_config_file
14 --output_config_file $output_config_file 25 --output_config_file "$output_config_file"
15 #end if 26 #end if
16 --primer-prefix "$primer_prefix" 27 --primer-prefix "$primer_prefix"
17 --2merMinReps $min_2mer_repeats 28 --2merMinReps $min_2mer_repeats
18 --3merMinReps $min_3mer_repeats 29 --3merMinReps $min_3mer_repeats
19 --4merMinReps $min_4mer_repeats 30 --4merMinReps $min_4mer_repeats
33 --primer-pair-max-diff-tm $primer.primer_pair_max_diff_tm 44 --primer-pair-max-diff-tm $primer.primer_pair_max_diff_tm
34 #end if 45 #end if
35 #if str( $mispriming.mispriming_options ) == "custom" 46 #if str( $mispriming.mispriming_options ) == "custom"
36 --primer-mispriming-library $mispriming.mispriming_library 47 --primer-mispriming-library $mispriming.mispriming_library
37 #end if 48 #end if
49 #if str( $platform.platform_type ) == "illumina"
50 #if $platform.filters
51 #for $filter in str($platform.filters).split(',')
52 $filter
53 --filter_microsats "$output_filtered_microsats"
54 #end for
55 #end if
56 #if str( $platform.assembly ) == '-assembly'
57 $platform.assembly "$output_assembly"
58 #end if
59 #end if
38 </command> 60 </command>
39 <requirements>
40 <requirement type="package" version="5.16.3">perl</requirement>
41 <requirement type="package" version="0.02.04">pal_finder</requirement>
42 <requirement type="package" version="2.0.0">primer3_core</requirement>
43 </requirements>
44 <inputs> 61 <inputs>
45 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" /> 62 <param name="primer_prefix" type="text" value="test" size="25" label="Primer prefix" help="This prefix will be added to the beginning of all primer names" />
46 <conditional name="platform"> 63 <conditional name="platform">
47 <param name="platform_type" type="select" label="Sequencing platform used to generate data" help="Currently pal_finder only handles Illumina paired-end reads and 454 single-end reads" > 64 <param name="platform_type" type="select" label="Sequencing platform used to generate data" help="Currently pal_finder only handles Illumina paired-end reads and 454 single-end reads" >
48 <option value="illumina" selected="true">Illumina</option> 65 <option value="illumina" selected="true">Illumina</option>
49 <option value="454">454</option> 66 <option value="454">454</option>
50 </param> 67 </param>
51 <when value="illumina"> 68 <when value="illumina">
52 <param name="input_fastq_r1" type="data" format="fastqsanger" label="Illumina fastq file (read 1)" /> 69 <conditional name="paired_input_type_conditional">
53 <param name="input_fastq_r2" type="data" format="fastqsanger" label="Illumina fastq file (read 2)" /> 70 <param name="paired_input_type" type="select" label="Input Type">
54 <param name="filter_microsats" type="boolean" truevalue="True" falsevalue="False" 71 <option value="pair_of_files" selected="true">Pair of datasets</option>
55 label="Filter and sort the microsatellites" checked="True" 72 <option value="collection">Dataset collection pair</option>
56 help="Filter pal_finder results to only include lines with primer sequences and remove non-perfect repeats" /> 73 </param>
74 <when value="pair_of_files">
75 <param name="input_fastq_r1" type="data" format="fastqsanger"
76 label="Illumina fastq file (read 1)" />
77 <param name="input_fastq_r2" type="data" format="fastqsanger"
78 label="Illumina fastq file (read 2)" />
79 </when>
80 <when value="collection">
81 <param name="input_fastq_pair" format="fastqsanger"
82 type="data_collection" collection_type="paired"
83 label="Select FASTQ dataset collection with R1/R2 pair" />
84 </when>
85 </conditional>
86 <param name="filters" type="select" display="checkboxes"
87 multiple="True" label="Filters to apply to the pal_finder results"
88 help="Apply none, one or more filters to refine results">
89 <option value="-primers" selected="True">Only include loci with designed primers</option>
90 <option value="-occurrences" selected="True">Exclude loci where the primer sequences occur more than once in the reads</option>
91 <option value="-rankmotifs" selected="True">Only include loci with 'perfect' motifs, and rank by motif size</option>
92 </param>
93 <param name="assembly" type="boolean"
94 checked="True" truevalue="-assembly" falsevalue=""
95 label="Use PANDAseq to assemble paired-end reads and confirm primer sequences are present in high-quality assembly" />
57 </when> 96 </when>
58 <when value="454"> 97 <when value="454">
59 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" /> 98 <param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" />
60 </when> 99 </when>
61 </conditional> 100 </conditional>
115 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False" 154 <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False"
116 label="Output the config file to the history" 155 label="Output the config file to the history"
117 help="Can be used to run pal_finder outside of Galaxy" /> 156 help="Can be used to run pal_finder outside of Galaxy" />
118 </inputs> 157 </inputs>
119 <outputs> 158 <outputs>
120 <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix} (microsatellite types)" /> 159 <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: all microsatellites (full details)" />
121 <data name="output_pal_summary" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix} (microsatellites with read IDs and primer pairs)" /> 160 <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: filtered microsatellites (full details)">
122 <data name="output_filtered_microsats" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix} (filtered and sorted microsatellites)"> 161 <filter>platform['platform_type'] == 'illumina' and platform['filters'] is not None</filter>
123 <filter>platform['platform_type'] == 'illumina' and platform['filter_microsats']</filter>
124 </data> 162 </data>
125 <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix} (config file)"> 163 <data name="output_microsat_summary" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: summary of microsatellite types" />
164 <data name="output_assembly" format="tabular" label="${tool.name} on ${on_string} for ${primer_prefix}: assembly">
165 <filter>platform['assembly'] is True</filter>
166 </data>
167 <data name="output_config_file" format="txt" label="${tool.name} on ${on_string} for ${primer_prefix}: config file">
126 <filter>keep_config_file is True</filter> 168 <filter>keep_config_file is True</filter>
127 </data> 169 </data>
128 </outputs> 170 </outputs>
129 <tests> 171 <tests>
130 <test> 172 <test>
131 <!-- Test with Illumina input --> 173 <!-- Test with Illumina input -->
132 <param name="platform_type" value="illumina" /> 174 <param name="platform_type" value="illumina" />
133 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" /> 175 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
134 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" /> 176 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
135 <!--
136 **NB** outputs have to be specified in order that they appear in the
137 tool (which is the order they will be written to the history) - the
138 test framework seems to use the order and ignores the "name" attribute
139 -->
140 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" /> 177 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
141 <output name="output_pal_summary" file="illuminaPE_microsats.out" /> 178 <output name="output_pal_summary" file="illuminaPE_microsats.out" />
142 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" /> 179 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" />
180 <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" />
181 </test>
182 <test>
183 <!-- Test with Illumina input as dataset pair -->
184 <param name="platform_type" value="illumina" />
185 <param name="paired_input_type" value="collection" />
186 <param name="input_fastq_pair">
187 <collection type="paired">
188 <element name="forward" value="illuminaPE_r1.fq" ftype="fastqsanger" />
189 <element name="reverse" value="illuminaPE_r2.fq" ftype="fastqsanger" />
190 </collection>
191 </param>
192 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
193 <output name="output_pal_summary" file="illuminaPE_microsats.out" />
194 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats.out" />
195 <output name="output_assembly" file="illuminaPE_assembly_after_filters.out" />
196 </test>
197 <test>
198 <!-- Test with Illumina input filter to loci with PandaSEQ assembly
199 ('-assembly' option) -->
200 <param name="platform_type" value="illumina" />
201 <param name="filters" value="" />
202 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
203 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
204 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
205 <output name="output_pal_summary" file="illuminaPE_microsats.out" />
206 <output name="output_assembly" file="illuminaPE_assembly.out" />
207 </test>
208 <test>
209 <!-- Test with Illumina input filter to loci with primers
210 ('-primers' option) -->
211 <param name="platform_type" value="illumina" />
212 <param name="filters" value="-primers" />
213 <param name="assembly" value="false" />
214 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
215 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
216 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
217 <output name="output_pal_summary" file="illuminaPE_microsats.out" />
218 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_primers.out" />
219 </test>
220 <test>
221 <!-- Test with Illumina input filter to loci which appear only once
222 ('-occurrences' option) -->
223 <param name="platform_type" value="illumina" />
224 <param name="filters" value="-occurrences" />
225 <param name="assembly" value="false" />
226 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
227 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
228 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
229 <output name="output_pal_summary" file="illuminaPE_microsats.out" />
230 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_occurrences.out" />
231 </test>
232 <test>
233 <!-- Test with Illumina input filter and rank loci with perfect motifs
234 ('-rankmotifs' option) -->
235 <param name="platform_type" value="illumina" />
236 <param name="filters" value="-rankmotifs" />
237 <param name="assembly" value="false" />
238 <param name="input_fastq_r1" value="illuminaPE_r1.fq" ftype="fastqsanger" />
239 <param name="input_fastq_r2" value="illuminaPE_r2.fq" ftype="fastqsanger" />
240 <output name="output_microsat_summary" file="illuminaPE_microsat_types.out" />
241 <output name="output_pal_summary" file="illuminaPE_microsats.out" />
242 <output name="output_filtered_microsats" file="illuminaPE_filtered_microsats_rankmotifs.out" />
143 </test> 243 </test>
144 <test> 244 <test>
145 <!-- Test with 454 input --> 245 <!-- Test with 454 input -->
146 <param name="platform_type" value="454" /> 246 <param name="platform_type" value="454" />
147 <param name="input_fasta" value="454_in.fa" ftype="fasta" /> 247 <param name="input_fasta" value="454_in.fa" ftype="fasta" />
148 <!--
149 **NB** outputs have to be specified in order that they appear in the
150 tool (which is the order they will be written to the history) - the
151 test framework seems to use the order and ignores the "name" attribute
152 -->
153 <output name="output_microsat_summary" file="454_microsat_types.out" /> 248 <output name="output_microsat_summary" file="454_microsat_types.out" />
154 <output name="output_pal_summary" file="454_microsats.out" /> 249 <output name="output_pal_summary" file="454_microsats.out" />
155 </test> 250 </test>
156 </tests> 251 </tests>
157 <help> 252 <help>
161 256
162 This tool runs the pal_finder program, which finds microsatellite repeat elements 257 This tool runs the pal_finder program, which finds microsatellite repeat elements
163 directly from raw 454 or Illumina paired-end sequencing reads. It then designs PCR 258 directly from raw 454 or Illumina paired-end sequencing reads. It then designs PCR
164 primers to amplify these repeat loci (Potentially Amplifiable Loci: PAL). 259 primers to amplify these repeat loci (Potentially Amplifiable Loci: PAL).
165 260
166 Optionally for Illumina data, the output from pal_finder can also be filtered to 261 Optionally for Illumina data, one or more filters can be applied to the output from
167 remove any motifs without primer sequences, and with non-perfect microsatellites. 262 pal_finder to:
168 The microsatellites are then ranked by motif size (largest to smallest). 263
264 * Only include loci with designed primers
265 * Exclude loci where the primer sequences occur more than once in the reads
266 * Only include loci with 'perfect' motifs (and rank by motif size,largest to
267 smallest)
268 * Use PANDAseq to assemble paired-end reads and confirm primer sequences are
269 present in high-quality assembly
169 270
170 Pal_finder runs the primer3_core program; information on the settings used in 271 Pal_finder runs the primer3_core program; information on the settings used in
171 primer3_core can be found in the Primer3 manual at 272 primer3_core can be found in the Primer3 manual at
172 http://primer3.sourceforge.net/primer3_manual.htm 273 http://primer3.sourceforge.net/primer3_manual.htm
173 274
197 Protocols: Methods in Molecular Biology. Humana Press, Totowa, NJ, pp 365-386 298 Protocols: Methods in Molecular Biology. Humana Press, Totowa, NJ, pp 365-386
198 299
199 The paper is available at 300 The paper is available at
200 http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf 301 http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf
201 302
202 The filtering and sorting of the pal_finder output for Illumina data is performed 303 The filtering and assembly of the pal_finder output for Illumina data is performed
203 using a Perl script written by Graeme Fox at the University of Manchester, and which 304 using a Python utility written by Graeme Fox at the University of Manchester, and which
204 is included with this tool. 305 is included with this tool; this utility uses the BioPython and PANDAseq packages.
205 306
206 Please kindly acknowledge both this Galaxy tool, the pal_finder and primer3 packages, and 307 Please kindly acknowledge both this Galaxy tool, the pal_finder and primer3 packages, and
207 the utility script if you use it in your work. 308 the utility script and its dependencies if you use it in your work.
208 </help> 309 </help>
209 <citations> 310 <citations>
210 <!-- 311 <!--
211 See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set 312 See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
212 Can be either DOI or Bibtex 313 Can be either DOI or Bibtex
213 Use http://www.bioinformatics.org/texmed/ to convert PubMed to Bibtex 314 Use http://www.bioinformatics.org/texmed/ to convert PubMed to Bibtex
214 --> 315 -->
215 <citation type="doi">10.1371/journal.pone.0030953</citation> 316 <citation type="doi">10.1371/journal.pone.0030953</citation>
216 <citation type="bibtex">@Article{pmid10547847, 317 <citation type="bibtex">@Article{pmid10547847,
217 Author="Rozen, S. and Skaletsky, H. ", 318 Author="Rozen, S. and Skaletsky, H. ",
218 Title="{{P}rimer3 on the {W}{W}{W} for general users and for biologist programmers}", 319 Title="{{P}rimer3 on the {W}{W}{W} for general users and for biologist programmers}",
219 Journal="Methods Mol. Biol.", 320 Journal="Methods Mol. Biol.",
220 Year="2000", 321 Year="2000",
221 Volume="132", 322 Volume="132",
222 Pages="365--386", 323 Pages="365--386",
223 URL="{http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf}" 324 URL="{http://purl.com/STEVEROZEN/papers/rozen-and-skaletsky-2000-primer3.pdf}"
224 }</citation> 325 }</citation>
326 <citation type="doi">10.1093/bioinformatics/btp163</citation>
327 <citation type="doi">10.1186/1471-2105-13-31</citation>
225 </citations> 328 </citations>
226 </tool> 329 </tool>