annotate ffp_phylogeny.xml @ 0:eb6e5e78a066 draft

Uploaded
author damion
date Mon, 23 Feb 2015 18:25:25 -0500
parents
children d1c88b118a3f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
1 <tool id="ffp_phylogeny" name="Feature Frequency Profile Phylogeny" version="0.1.00">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
2 <description>An alignment free comparison tool for phylogenetic analysis and text comparison</description>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
3 <requirements>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
4 <requirement type="package" version="0.3.19_d4382db015acec0e5cc43d6c1ac80ae12cb7e6b3">ffp-phylogeny</requirement>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
5 </requirements>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
6
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
7 <macros>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
8 <token name="@BINARY@">./ffp_phylogeny.py</token>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
9 <import>ffp_macros.xml</import>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
10 </macros>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
11 <expand macro="requirements" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
12 <command interpreter="python"><![CDATA[
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
13 ffp_phylogeny.py
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
14 #for $i in $sequence.filesin
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
15 "$i" ## full file paths
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
16 #end for
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
17 -x "
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
18 #for $i in $sequence.filesin
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
19 $i.name, ## original file names
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
20 #end for
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
21 "
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
22 -t "$(sequence.file_type.split('-')[0])"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
23 -l "$length"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
24 -o "$info"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
25 ##if $normalize:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
26 ## -n
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
27 ##end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
28 #if $sequence.file_type != 'text':
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
29 #if $sequence.file_type.find('multi') > 0:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
30 -m
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
31 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
32 #if $sequence.grouping:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
33 -d
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
34 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
35 #if $metric:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
36 -M "$metric"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
37 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
38 #if $similarity:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
39 -s
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
40 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
41 #if $abbreviate:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
42 -a
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
43 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
44 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
45 #if $phylogeny.phylo_type == 'filter':
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
46 -f "$phylogeny.filt.filter_type"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
47 -L "$phylogeny.filt.lower"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
48 -U "$phylogeny.filt.upper"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
49 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
50 #if $tree:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
51 -T
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
52 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
53
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
54 ##ffpjsd -n FLOAT , --normval=FLOAT
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
55 ## For option -e, --euclid, change the n-norm distance (Default is n=2) to any other value where n > 1
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
56
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
57 ]]></command>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
58 <expand macro="stdio" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
59 <inputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
60
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
61 <!-- Either amino acid or nucleotide input -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
62 <!-- Ideally we could determine from file content or suffix what type it is -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
63
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
64 <param name="length" type="integer" min="1" max="25" label="l-mer length" value="6" help="String of valid characters of this length will be counted. Synonyms: feature, k-mer, n-gram, k-tuple" size="2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
65 <!--
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
66 <param name="normalize" label="Normalize counts into relative frequency" type="boolean" checked="true" help="" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
67 -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
68 <conditional name="sequence">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
69 <param type="select" name="file_type" label="File type" help="Note: For phylogeny display, at least three profiles are required, as files or fasta sequences within a file.">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
70 <option value="amino">Amino Acids, one sequence per file</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
71 <option value="amino-multi">Amino Acids, multiple fasta sequences per file</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
72 <option value="nucleotide">Nucleic acids, one sequence per file</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
73 <option value="nucleotide-multi">Nucleic acids, multiple fasta sequences per file</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
74 <option value="text">Text, single file</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
75 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
76
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
77 <when value="amino"><!-- ffpaa -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
78 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
79 <param name="grouping" label="Enable amino acid grouping" type="boolean" checked="true" help="Counts amino acids in groups rather than individually (usually advantageous, see below)." />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
80 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
81
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
82 <when value="amino-multi">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
83 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
84 <param name="grouping" label="Enable amino acid grouping" type="boolean" checked="true" help="Counts amino acids in groups rather than individually (usually advantageous, see below)." />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
85 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
86
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
87 <when value="nucleotide"><!-- ffpry -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
88 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
89 <param name="grouping" label="Enable purine / pyrimidine grouping" type="boolean" checked="true" help="Counts each nucleotide as a purine(R) or pyrimidine(Y) rather than individually (usually advantageous)." />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
90 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
91
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
92 <when value="nucleotide-multi">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
93 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
94 <param name="grouping" label="Enable purine / pyrimidine grouping" type="boolean" checked="true" help="Counts each nucleotide as a purine(R) or pyrimidine(Y) rather than individually (usually advantageous)." />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
95 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
96
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
97 <when value="text"><!-- ffptxt -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
98 <param name="filesin" type="data" multiple="true"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
99 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
100
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
101
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
102 </conditional>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
103
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
104 <conditional name="phylogeny">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
105 <param type="select" name="phylo_type" label="Feature filtering">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
106 <option value="all">Include all features (Phenetic phylogeny)</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
107 <option value="filt">Include only filtered features (Core/evolutionary phylogeny) </option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
108 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
109 <when value="all"></when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
110 <when value="filt">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
111 <conditional name="filt">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
112
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
113 <param type="select" name="filter_type" label="Filter type" help="Features are included in profiles if at least 1 profile has lower count/percent, and no profile has more than upper count/percent">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
114 <option value="count">lower / upper count limit</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
115 <option value="f">raw frequencies</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
116 <option value="n">normal distribution</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
117 <option value="e">extreme value (Gumbel) distribution</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
118 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
119 <when value="count">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
120 <param name="lower" type="integer" label="lower count (one profile needs at least this)" value="0" min="0" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
121 <param name="upper" type="integer" label="upper count (no profile can have more than this)" value="0" min="0" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
122 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
123 <when value="f">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
124 <param name="lower" type="float" label="lower &#37;" value="0.05" min="0" max=".5" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
125 <param name="upper" type="float" label="upper &#37;" value="0.95" min=".5" max="1" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
126 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
127 <when value="n">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
128 <param name="lower" type="float" label="lower &#37;" value="0.05" min="0" max=".5" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
129 <param name="upper" type="float" label="upper &#37;" value="0.95" min=".5" max="1" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
130 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
131 <when value="e">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
132 <param name="lower" type="float" label="lower &#37;" value="0.05" min="0" max=".5" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
133 <param name="upper" type="float" label="upper &#37;" value="0.95" min=".5" max="1" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
134 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
135
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
136 </conditional>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
137 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
138
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
139 </conditional>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
140
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
141 <param type="select" name="metric" label="Continuous Distance Measure" help="See ffpjsd documentation for details.">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
142 <option value="" selected="true">Jensen Shannon divergence (default)</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
143 <option value="euclid">Euclidean</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
144 <option value="euclid2">Euclidean squared</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
145 <option value="cosine">Cosine</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
146 <option value="manhattan">Manhattan</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
147 <option value="pearson">pearson correlation coefficient*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
148 <option value="chebyshev">Chebyshev</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
149 <option value="canberra">Canberra</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
150 <option value="hamming">Hamming</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
151 <option value="evol">Evolutionary Distance used in E.coli Publications</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
152
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
153 <!--
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
154
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
155 With these options the input FFPs are treated as binary data. When two FFPs (i and j) are compared each
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
156 distance measure uses a cross tabulation for pairwise feature comparison with sums A, B, C and D. A is
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
157 the number of features which are present in both vectors while D is the number of features that are absent in
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
158 both vectors. B means the feature is present in i and absent in j. C means the feature is absent in i but
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
159 present in j. N is the sum of A+B+C+D. All of the binary distance options can be used together with the -s
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
160 option to print a similarity matrix. THe binary distance do not need to be normalized with ffprwn.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
161
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
162 <option value="">BINARY DISTANCE MEASURES</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
163
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
164 -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
165
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
166 <option value="matching">matching*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
167 <option value="jaccard">Jaccard*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
168 <option value="tanimoto">Rogers-Tanimoto*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
169 <option value="dice">Dice*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
170 <option value="antidice">anti-Dice*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
171 <option value="sneath">Sneath-Sokal*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
172 <option value="hamman">Hamman*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
173 <option value="phi">Pearson Phi*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
174 <option value="anderberg">Anderberg*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
175 <option value="gower">Gower*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
176 <option value="russel">Russel-Rao*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
177 <option value="yule">Yule*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
178 <option value="ochiai">Ochiai*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
179 <option value="kulczynski">Kulczynski*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
180
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
181 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
182
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
183 <param type="boolean" name="similarity" label="*Similarity Matrix" help="Print a similarity matrix rather than a distance matrix for items marked by asterisk(*). This option effects the output of distances metrics which have a value normalized from 0 to 1 or -1 to 1."/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
184
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
185 <param type="boolean" name="abbreviate" label="Short labels" help="Shorten tree taxonomy labels as much as possible."/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
186
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
187 <param type="boolean" name="tree" checked="true" label="Generate Tree Phylogeny" truevalue="1" falsevalue="0" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
188
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
189 </inputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
190 <outputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
191 <data name="info" format="nhx" label="Feature Frequency Profile">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
192 <change_format>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
193 <when input="tree" value="0" format="tabular"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
194 </change_format>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
195 <!-- doesn't work: filter>tree == "1"</filter -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
196 </data>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
197 </outputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
198
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
199 <tests>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
200 <test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
201 <param name="length" value="1"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
202 <param name="tree" value="0"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
203 <param name="grouping" value="true"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
204 <param name="file_type" value="nucleotide"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
205 <param name="filesin" value="genome1,genome2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
206 <output name="info" file="test_length_1_output.tabular"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
207 </test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
208 <test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
209 <param name="length" value="2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
210 <param name="tree" value="0"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
211 <param name="grouping" value="true"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
212 <param name="file_type" value="nucleotide"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
213 <param name="filesin" value="genome1,genome2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
214 <output name="info" file="test_length_2_output.tabular"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
215 </test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
216 </tests>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
217
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
218 <help><![CDATA[
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
219
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
220 .. class:: infomark
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
221
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
222
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
223 **What it does**
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
224
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
225 FFP (Feature frequency profile) is an alignment free comparison tool for phylogenetic analysis and text comparison. It can be applied to nucleotide sequences, complete genomes, proteomes and even used for text comparison.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
226
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
227 This galaxy tool prepares a mini-pipeline consisting of **[ffpry | ffpaa | ffptxt] &gt; [ ffpfilt | ffpcol &gt; ffprwn] &gt; ffpjsd &gt; ffptree** . The last step is optional - by deselecting the "Generate Tree Phylogeny" checkbox, the tool will output a distance matrix rather than a Newick (.nhx) formatted tree file.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
228
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
229 Each sequence or text file has a profile containing tallies of each feature found. A feature is a string of valid characters of given length.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
230
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
231 For nucleotide data, by default each character (ATGC) is grouped as either purine(R) or pyrmidine(Y) before being counted.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
232
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
233 For amino acid data, by default each character is grouped into one of the following:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
234 (ST),(DE),(KQR),(IVLM),(FWY),C,G,A,N,H,P. Each group is represented by the first character in its series.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
235
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
236 One other key concept is that a given feature, e.g. "TAA" is counted in forward
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
237 AND reverse directions, mirroring the idea that a feature&apos;s orientation is not
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
238 so important to distinguish when it comes to alignment-free comparison.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
239 The counts for "TAA" and "AAT" are merged.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
240
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
241 The labeling of the resulting counted feature items is perhaps the trickiest
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
242 concept to master. Due to computational efficiency measures taken by the
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
243 developers, a feature that we see on paper as "TAC" may be stored and labeled
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
244 internally as "GTA", its reverse compliment. One must look for the alternative
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
245 if one does not find the original.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
246
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
247 Also note that in amino acid sequences the stop codon "*" (or any other character
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
248 that is not in the Amino acid alphabet) causes that character frame not to be
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
249 counted. Also, character frames never span across fasta entries.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
250
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
251 A few tutorials:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
252 * http://sourceforge.net/projects/ffp-phylogeny/files/Documentation/tutorial.pdf
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
253 * https://github.com/apetkau/microbial-informatics-2014/tree/master/labs/ffp-phylogeny
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
254
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
255 -------
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
256
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
257 .. class:: warningmark
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
258
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
259 **Note**
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
260
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
261 Taxonomy label details: If each file contains one profile, the file's name is used to label the profile.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
262 If each file contains fasta sequences to profile individually, their fasta identifiers will be used to label them.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
263 The "short labels" option will find the shortest label that uniquely identifies each profile.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
264 Either way, there are some quirks: ffpjsd clips labels to 10 characters if they are greater than 50 characters, so all labels are trimmed to 50 characters first.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
265 Also "id" is prefixed to any numeric label since some tree visualizers won't show purely numeric labels.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
266 In the accidental case where a Fasta sequence label is a duplicate of a previous one it will be prefixed by "DupLabel-".
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
267
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
268 The command line ffpjsd can hang if one provides an l-mer length greater than the length of file content.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
269 One must identify its process id (">ps aux | grep ffpjsd") and kill it (">kill [process id]").
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
270 -------
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
271
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
272 **References**
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
273
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
274 The original ffp-phylogeny code is at http://ffp-phylogeny.sourceforge.net/ .
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
275 This tool uses Aaron Petkau's modified version: https://github.com/apetkau/ffp-3.19-custom .
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
276
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
277 The development of the ff-phylogeny should be attributed to:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
278
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
279 Sims GE, Jun S-R, Wu GA, Kim S-H. Alignment-free genome comparison with feature frequency profiles (FFP) and optimal resolutions. Proceedings of the National Academy of Sciences of the United States of America 2009;106(8):2677-2682. doi:10.1073/pnas.0813249106.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
280
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
281 ]]></help>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
282 </tool>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
283
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
284