annotate weeder2_wrapper.xml @ 3:f19e18ab01b1 draft

Uploaded v2.0.2 (use conda for dependency resolution)
author pjbriggs
date Mon, 05 Mar 2018 10:19:50 -0500
parents 3c5f10f7dd40
children 89315bdc1a8c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
1 <tool id="motiffinding_weeder2" name="Weeder2" version="2.0.2">
0
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
2 <description>Motif discovery in sequences from coregulated genes of a single species</description>
3
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
3 <macros>
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
4 <import>weeder2_macros.xml</import>
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
5 </macros>
2
3c5f10f7dd40 Updated to tool version 2.0.1 (use data table to locate freqfiles).
pjbriggs
parents: 1
diff changeset
6 <requirements>
3c5f10f7dd40 Updated to tool version 2.0.1 (use data table to locate freqfiles).
pjbriggs
parents: 1
diff changeset
7 <requirement type="package" version="2.0">weeder</requirement>
3c5f10f7dd40 Updated to tool version 2.0.1 (use data table to locate freqfiles).
pjbriggs
parents: 1
diff changeset
8 </requirements>
3
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
9 <command><![CDATA[
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
10 @CONDA_WEEDER2_FREQFILES_PATH@ &&
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
11 bash $__tool_directory__/weeder2_wrapper.sh
2
3c5f10f7dd40 Updated to tool version 2.0.1 (use data table to locate freqfiles).
pjbriggs
parents: 1
diff changeset
12 $sequence_file $species_code ${species_code.fields.path}
0
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
13 $output_motifs_file $output_matrix_file
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
14 $strands
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
15 #if $chipseq.use_chipseq
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
16 -chipseq -top $chipseq.top
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
17 #end if
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
18 #if str( $advanced_options.advanced_options_selector ) == "on"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
19 -maxm $advanced_options.n_motifs_report
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
20 -b $advanced_options.n_motifs_build
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
21 -sim $advanced_options.sim_threshold
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
22 -em $advanced_options.em_cycles
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
23 #end if
3
f19e18ab01b1 Uploaded v2.0.2 (use conda for dependency resolution)
pjbriggs
parents: 2
diff changeset
24 ]]></command>
0
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
25 <inputs>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
26 <param name="sequence_file" type="data" format="fasta" label="Input sequence" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
27 <param name="species_code" type="select" label="Species to use for background comparison">
2
3c5f10f7dd40 Updated to tool version 2.0.1 (use data table to locate freqfiles).
pjbriggs
parents: 1
diff changeset
28 <options from_data_table="weeder2">
3c5f10f7dd40 Updated to tool version 2.0.1 (use data table to locate freqfiles).
pjbriggs
parents: 1
diff changeset
29 </options>
0
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
30 </param>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
31 <param name="strands" label="Use both strands of sequence" type="boolean"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
32 truevalue="" falsevalue="-ss" checked="True"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
33 help="If not checked then use -ss option" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
34 <conditional name="chipseq">
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
35 <param name="use_chipseq" type="boolean"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
36 label="Use the ChIP-seq heuristic"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
37 help="Speeds up the computation (-chipseq)"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
38 truevalue="yes" falsevalue="no" checked="on" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
39 <when value="yes">
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
40 <param name="top" type="integer" value="100"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
41 label="Number of top input sequences with oligos to scan for"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
42 help="Increase this value to improve the chance of finding motifs enriched only in a subset of your input sequences (-top)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
43 </when>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
44 <when value="no"></when>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
45 </conditional>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
46 <conditional name="advanced_options">
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
47 <param name="advanced_options_selector" type="select"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
48 label="Display advanced options">
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
49 <option value="off">Hide</option>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
50 <option value="on">Display</option>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
51 </param>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
52 <when value="on">
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
53 <param name="n_motifs_report" type="integer" value="25"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
54 label="Number of discovered motifs to report" help="(-maxm)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
55 <param name="n_motifs_build" type="integer" value="50"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
56 label="Number of top scoring motifs to build occurrences matrix profiles and outputs for"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
57 help="(-b)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
58 <param name="sim_threshold" type="float" min="0.0" max="1.0" value="0.95"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
59 label="Similarity threshold for the redundancy filter"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
60 help="Remove motifs that are too similar, with lower values imposing a stricter filter. Must be between 0.0 and 1.0 (-sim)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
61 <param name="em_cycles" type="integer" min="0" max="100" value="1"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
62 label="Number of expectation maximization (EM) cycles to perform"
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
63 help="Number of cycles must be between 0 and 100 (-em)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
64 </when>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
65 <when value="off">
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
66 </when>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
67 </conditional>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
68 </inputs>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
69 <outputs>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
70 <data name="output_motifs_file" format="txt" label="Weeder2 on ${on_string} (motifs)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
71 <data name="output_matrix_file" format="txt" label="Weeder2 on ${on_string} (matrix)" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
72 </outputs>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
73 <tests>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
74 <test>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
75 <param name="sequence_file" value="weeder_in.fa" ftype="fasta" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
76 <param name="species_code" value="MM" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
77 <output name="output_motifs_file" file="weeder2_motifs.out" lines_diff="2" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
78 <output name="output_matrix_file" file="weeder2_matrix.out" />
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
79 </test>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
80 </tests>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
81 <help>
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
82
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
83 .. class:: infomark
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
84
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
85 **What it does**
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
86
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
87 Weeder2 is a program for finding novel motifs (transcription factor binding sites)
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
88 conserved in a set of regulatory regions of related genes.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
89
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
90 -------------
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
91
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
92 .. class:: infomark
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
93
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
94 **Usage advice**
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
95
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
96 Guidelines on how to use this tool can be seen in Zambelli et al. 2014 (see link
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
97 below), but the following is a brief guide. Please note that **motifs** are a model
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
98 or matrix that describes a set of sequences that may differ in the base composition.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
99 **Oligos** are specific sequences found within the input sequences or genomic
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
100 background.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
101
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
102 **Input sequence** (in FASTA format) should be short (100-200bp) and be reasonably
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
103 expected to contain an enriched motif(s). This is not generally an issue with
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
104 transcription factor ChIP-seq derived sequences centred on the summit of binding
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
105 regions that are expected to contain a dominant motif and possibly secondary motifs.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
106
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
107 There is **no need to mask sequence for repetitive sequence** as factors may
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
108 legitimately bind repetitive sequence.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
109
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
110 **Use both strands of sequence** by default, unless there is a specific reason not
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
111 to do so.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
112
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
113 **Species to use for background comparison** should match the genome used to
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
114 generate the **input sequence**. The background genome motif frequencies are
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
115 generated from within the promoter regions of annotated genes and are shown to be a
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
116 good background for both promoter and other regulatory regions.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
117
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
118 **Use the ChIP-seq heuristic** (-chipseq) when there are a large number of
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
119 input sequences (hundreds or thousands). When -chipseq is used Weeder will use
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
120 only oligos from the first 100 sequences to build motifs with which it scans
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
121 all of the input sequences. This speeds up the computational time without too much
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
122 risk of losing important motifs. Even if not strictly necessary it's advisable to
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
123 order input sequences by their significance, e.g. fold enrichment or Pvalue. For
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
124 large data sets (-top) should be set to a number equating at least 10 to 20% of
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
125 input sequences (as recommended by the authors).
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
126
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
127 **Number of discovered motifs to report** (-maxm) limits the number of reported
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
128 motifs even if there are more than -maxm. **Number of top scoring motifs to build
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
129 occurrences matrix profiles and outputs for** (-b) changes the number of top
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
130 scoring motifs of length 6, 8 and 10 for which the occurrence matrix is built.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
131 Increasing -b may result in a larger number of reported motifs, but with potentially
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
132 more of low significance and increases the computational time. If increasing -b does
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
133 not result in more motifs in your results it means that the additional motifs are
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
134 filtered out by the redundancy filter or that the maximum number of reported motifs
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
135 set by -maxm has been reached.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
136
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
137 **Similarity threshold for the redundancy filter** (-sim) default setting is
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
138 recommended.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
139
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
140 **Number of expectation maximization (EM) cycles to perform** (-em) default is
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
141 recommended. The option is included to help "clean up" the resulting motif matrices.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
142 In this version the number of EM steps can be increased, which can be useful for
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
143 motifs with highly redundant stretches of sequence.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
144
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
145 -------------
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
146
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
147 .. class:: infomark
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
148
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
149 **A note on the results**
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
150
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
151 The resulting matrices are the result of scanning (by default both strands) for
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
152 oligos of length 6, 8 and 8, allowing 1, 2 and 3 substitutions respectively. The
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
153 matrices within the matrix.w2 file can be input into other tools. The recommended
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
154 next step is to use **STAMP** (http://www.benoslab.pitt.edu/stamp/), which displays
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
155 the motifs as logos and identifies matches with libraries of known DNA binding
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
156 motifs, such as TRANSFAC or JASPAR.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
157
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
158 -------------
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
159
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
160 .. class:: infomark
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
161
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
162 **Credits**
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
163
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
164 This Galaxy tool has been developed by Peter Briggs and Ian Donaldson within the
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
165 Bioinformatics Core Facility at the University of Manchester, and runs the Weeder2
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
166 motif discovery package:
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
167
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
168 * Zambelli, F., Pesole, G. and Pavesi, G. 2014. Using Weeder, Pscan, and PscanChIP
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
169 for the Discovery of Enriched Transcription Factor Binding Site Motifs in
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
170 Nucleotide Sequences. Current Protocols in Bioinformatics. 47:2.11:2.11.1–2.11.31.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
171 * http://onlinelibrary.wiley.com/doi/10.1002/0471250953.bi0211s47/full
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
172
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
173 This tool is compatible with Weeder 2.0:
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
174
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
175 * http://159.149.160.51/modtools/downloads/weeder2.html
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
176
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
177 Please kindly acknowledge both this Galaxy tool, the Weeder package and the utility
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
178 scripts if you use it in your work.
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
179 </help>
1
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
180 <citations>
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
181 <!--
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
182 See https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
183 Can be either DOI or Bibtex
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
184 Use http://www.bioinformatics.org/texmed/ to convert PubMed to Bibtex
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
185 -->
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
186 <citation type="doi">10.1002/0471250953.bi0211s47</citation>
571cb77ab9e7 Add citation tag and README file.
pjbriggs
parents: 0
diff changeset
187 </citations>
0
496bc4eff47e Initial version.
pjbriggs
parents:
diff changeset
188 </tool>