annotate ffp_phylogeny.xml @ 3:79a4a86981d3 draft default tip

Uploaded
author damion
date Thu, 23 Apr 2015 17:47:39 -0400
parents 671667722d3d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
79a4a86981d3 Uploaded
damion
parents: 2
diff changeset
1 <tool id="ffp_phylogeny" name="Feature Frequency Profile Phylogeny" version="0.1.04">
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
2 <description>An alignment free comparison tool for phylogenetic analysis and text comparison</description>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
3 <requirements>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
4 <requirement type="package" version="0.3.19_d4382db015acec0e5cc43d6c1ac80ae12cb7e6b3">ffp-phylogeny</requirement>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
5 </requirements>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
6
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
7 <macros>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
8 <token name="@BINARY@">./ffp_phylogeny.py</token>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
9 <import>ffp_macros.xml</import>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
10 </macros>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
11 <expand macro="requirements" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
12 <command interpreter="python"><![CDATA[
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
13 ffp_phylogeny.py
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
14 #for $i in $sequence.filesin
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
15 "$i" ## full file paths
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
16 #end for
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
17 -x "
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
18 #for $i in $sequence.filesin
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
19 $i.name, ## original file names
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
20 #end for
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
21 "
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
22 -t "$(sequence.file_type.split('-')[0])"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
23 -l "$length"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
24 -o "$info"
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
25 ##if $normalize
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
26 ## -n
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
27 ##end if
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
28 #if $sequence.file_type != 'text'
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
29 #if $sequence.file_type == 'amino-multi' or $sequence.file_type == 'nucleotide-multi'
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
30 -m
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
31 #end if
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
32 #if $sequence.groupings
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
33 #pass
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
34 #else
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
35 -d
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
36 #end if
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
37 #if $metric
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
38 -M "$metric"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
39 #end if
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
40 #if $similarity
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
41 -s
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
42 #end if
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
43 #if $abbreviate
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
44 -a
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
45 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
46 #end if
3
79a4a86981d3 Uploaded
damion
parents: 2
diff changeset
47 #if $phylogeny.phylo_type == 'filt'
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
48 -f "$phylogeny.filt.filter_type"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
49 -L "$phylogeny.filt.lower"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
50 -U "$phylogeny.filt.upper"
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
51 #end if
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
52 #if $tree
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
53 -T
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
54 #end if
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
55 ##ffpjsd -n FLOAT , --normval=FLOAT
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
56 ## For option -e, --euclid, change the n-norm distance (Default is n=2) to any other value where n > 1
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
57
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
58 ]]></command>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
59 <expand macro="stdio" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
60 <inputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
61
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
62 <!-- Either amino acid or nucleotide input -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
63 <!-- Ideally we could determine from file content or suffix what type it is -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
64
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
65 <param name="length" type="integer" min="1" max="25" label="l-mer length" value="6" help="String of valid characters of this length will be counted. Synonyms: feature, k-mer, n-gram, k-tuple" size="2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
66 <!--
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
67 <param name="normalize" label="Normalize counts into relative frequency" type="boolean" checked="true" help="" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
68 -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
69 <conditional name="sequence">
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
70 <param type="select" name="file_type" label="File type" help="Note: For phylogeny display, at least three profiles are required.">
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
71 <option value="amino">Amino Acids, one profile per file</option>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
72 <option value="amino-multi">Amino Acids, one profile per fasta sequence in file</option>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
73 <option value="nucleotide">Nucleic acids, one profile per file</option>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
74 <option value="nucleotide-multi">Nucleic acids, one profile per fasta sequence in file</option>
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
75 <option value="text">Text, single file</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
76 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
77
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
78 <when value="amino"><!-- ffpaa -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
79 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
80 <param name="groupings" label="Enable amino acid grouping" type="boolean" checked="true" help="Counts amino acids in groups rather than individually (usually advantageous, see below)." />
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
81 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
82
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
83 <when value="amino-multi">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
84 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
85 <param name="groupings" label="Enable amino acid grouping" type="boolean" checked="true" help="Counts amino acids in groups rather than individually (usually advantageous, see below)." />
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
86 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
87
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
88 <when value="nucleotide"><!-- ffpry -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
89 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
90 <param name="groupings" label="Enable purine / pyrimidine grouping" type="boolean" checked="true" help="Counts each nucleotide as a purine(R) or pyrimidine(Y) rather than individually (usually advantageous)." />
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
91 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
92
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
93 <when value="nucleotide-multi">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
94 <param name="filesin" type="data" label="Select input file(s)" format="fasta" multiple="true" />
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
95 <param name="groupings" label="Enable purine / pyrimidine grouping" type="boolean" checked="true" help="Counts each nucleotide as a purine(R) or pyrimidine(Y) rather than individually (usually advantageous)." />
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
96 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
97
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
98 <when value="text"><!-- ffptxt -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
99 <param name="filesin" type="data" multiple="true"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
100 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
101
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
102
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
103 </conditional>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
104
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
105 <conditional name="phylogeny">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
106 <param type="select" name="phylo_type" label="Feature filtering">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
107 <option value="all">Include all features (Phenetic phylogeny)</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
108 <option value="filt">Include only filtered features (Core/evolutionary phylogeny) </option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
109 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
110 <when value="all"></when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
111 <when value="filt">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
112 <conditional name="filt">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
113
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
114 <param type="select" name="filter_type" label="Filter type" help="Features are included in profiles if at least 1 profile has lower count/percent, and no profile has more than upper count/percent">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
115 <option value="count">lower / upper count limit</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
116 <option value="f">raw frequencies</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
117 <option value="n">normal distribution</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
118 <option value="e">extreme value (Gumbel) distribution</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
119 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
120 <when value="count">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
121 <param name="lower" type="integer" label="lower count (one profile needs at least this)" value="0" min="0" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
122 <param name="upper" type="integer" label="upper count (no profile can have more than this)" value="0" min="0" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
123 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
124 <when value="f">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
125 <param name="lower" type="float" label="lower &#37;" value="0.05" min="0" max=".5" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
126 <param name="upper" type="float" label="upper &#37;" value="0.95" min=".5" max="1" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
127 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
128 <when value="n">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
129 <param name="lower" type="float" label="lower &#37;" value="0.05" min="0" max=".5" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
130 <param name="upper" type="float" label="upper &#37;" value="0.95" min=".5" max="1" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
131 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
132 <when value="e">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
133 <param name="lower" type="float" label="lower &#37;" value="0.05" min="0" max=".5" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
134 <param name="upper" type="float" label="upper &#37;" value="0.95" min=".5" max="1" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
135 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
136
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
137 </conditional>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
138 </when>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
139
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
140 </conditional>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
141
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
142 <param type="select" name="metric" label="Continuous Distance Measure" help="See ffpjsd documentation for details.">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
143 <option value="" selected="true">Jensen Shannon divergence (default)</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
144 <option value="euclid">Euclidean</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
145 <option value="euclid2">Euclidean squared</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
146 <option value="cosine">Cosine</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
147 <option value="manhattan">Manhattan</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
148 <option value="pearson">pearson correlation coefficient*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
149 <option value="chebyshev">Chebyshev</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
150 <option value="canberra">Canberra</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
151 <option value="hamming">Hamming</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
152 <option value="evol">Evolutionary Distance used in E.coli Publications</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
153
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
154 <!--
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
155
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
156 With these options the input FFPs are treated as binary data. When two FFPs (i and j) are compared each
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
157 distance measure uses a cross tabulation for pairwise feature comparison with sums A, B, C and D. A is
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
158 the number of features which are present in both vectors while D is the number of features that are absent in
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
159 both vectors. B means the feature is present in i and absent in j. C means the feature is absent in i but
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
160 present in j. N is the sum of A+B+C+D. All of the binary distance options can be used together with the -s
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
161 option to print a similarity matrix. THe binary distance do not need to be normalized with ffprwn.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
162
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
163 <option value="">BINARY DISTANCE MEASURES</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
164
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
165 -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
166
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
167 <option value="matching">matching*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
168 <option value="jaccard">Jaccard*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
169 <option value="tanimoto">Rogers-Tanimoto*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
170 <option value="dice">Dice*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
171 <option value="antidice">anti-Dice*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
172 <option value="sneath">Sneath-Sokal*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
173 <option value="hamman">Hamman*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
174 <option value="phi">Pearson Phi*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
175 <option value="anderberg">Anderberg*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
176 <option value="gower">Gower*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
177 <option value="russel">Russel-Rao*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
178 <option value="yule">Yule*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
179 <option value="ochiai">Ochiai*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
180 <option value="kulczynski">Kulczynski*</option>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
181
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
182 </param>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
183
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
184 <param type="boolean" name="similarity" label="*Similarity Matrix" help="Print a similarity matrix rather than a distance matrix for items marked by asterisk(*). This option effects the output of distances metrics which have a value normalized from 0 to 1 or -1 to 1."/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
185
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
186 <param type="boolean" name="abbreviate" label="Short labels" help="Shorten tree taxonomy labels as much as possible."/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
187
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
188 <param type="boolean" name="tree" checked="true" label="Generate Tree Phylogeny" truevalue="1" falsevalue="0" />
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
189
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
190 </inputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
191 <outputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
192 <data name="info" format="nhx" label="Feature Frequency Profile">
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
193 <change_format>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
194 <when input="tree" value="0" format="tabular"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
195 </change_format>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
196 <!-- doesn't work: filter>tree == "1"</filter -->
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
197 </data>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
198 </outputs>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
199
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
200 <tests>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
201 <test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
202 <param name="length" value="1"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
203 <param name="tree" value="0"/>
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
204 <param name="groupings" value="false"/>
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
205 <param name="file_type" value="nucleotide"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
206 <param name="filesin" value="genome1,genome2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
207 <output name="info" file="test_length_1_output.tabular"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
208 </test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
209 <test>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
210 <param name="length" value="2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
211 <param name="tree" value="0"/>
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
212 <param name="groupings" value="false"/>
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
213 <param name="file_type" value="nucleotide"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
214 <param name="filesin" value="genome1,genome2"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
215 <output name="info" file="test_length_2_output.tabular"/>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
216 </test>
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
217 <test>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
218 <param name="length" value="2"/>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
219 <param name="tree" value="0"/>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
220 <param name="groupings" value="true"/>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
221 <param name="file_type" value="nucleotide-multi"/>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
222 <param name="filesin" value="genome1,genome2"/>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
223 <output name="info" file="test_length_2b_output.tabular"/>
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
224 </test>
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
225 </tests>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
226
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
227 <help><![CDATA[
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
228
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
229 .. class:: infomark
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
230
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
231
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
232 **What it does**
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
233
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
234 FFP (Feature frequency profile) is an alignment free comparison tool for phylogenetic analysis and text comparison. It can be applied to nucleotide sequences, complete genomes, proteomes and even used for text comparison.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
235
1
d1c88b118a3f Uploaded
damion
parents: 0
diff changeset
236 This galaxy tool prepares a mini-pipeline consisting of **[ffpry | ffpaa | ffptxt] > [ ffpfilt | ffpcol > ffprwn] > ffpjsd > ffptree** . The last step is optional - by deselecting the "Generate Tree Phylogeny" checkbox, the tool will output only the precursor distance matrix file rather than a Newick (.nhx) formatted tree file.
0
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
237
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
238 Each sequence or text file has a profile containing tallies of each feature found. A feature is a string of valid characters of given length.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
239
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
240 For nucleotide data, by default each character (ATGC) is grouped as either purine(R) or pyrmidine(Y) before being counted.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
241
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
242 For amino acid data, by default each character is grouped into one of the following:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
243 (ST),(DE),(KQR),(IVLM),(FWY),C,G,A,N,H,P. Each group is represented by the first character in its series.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
244
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
245 One other key concept is that a given feature, e.g. "TAA" is counted in forward
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
246 AND reverse directions, mirroring the idea that a feature&apos;s orientation is not
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
247 so important to distinguish when it comes to alignment-free comparison.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
248 The counts for "TAA" and "AAT" are merged.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
249
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
250 The labeling of the resulting counted feature items is perhaps the trickiest
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
251 concept to master. Due to computational efficiency measures taken by the
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
252 developers, a feature that we see on paper as "TAC" may be stored and labeled
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
253 internally as "GTA", its reverse compliment. One must look for the alternative
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
254 if one does not find the original.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
255
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
256 Also note that in amino acid sequences the stop codon "*" (or any other character
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
257 that is not in the Amino acid alphabet) causes that character frame not to be
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
258 counted. Also, character frames never span across fasta entries.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
259
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
260 A few tutorials:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
261 * http://sourceforge.net/projects/ffp-phylogeny/files/Documentation/tutorial.pdf
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
262 * https://github.com/apetkau/microbial-informatics-2014/tree/master/labs/ffp-phylogeny
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
263
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
264 -------
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
265
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
266 .. class:: warningmark
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
267
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
268 **Note**
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
269
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
270 Taxonomy label details: If each file contains one profile, the file's name is used to label the profile.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
271 If each file contains fasta sequences to profile individually, their fasta identifiers will be used to label them.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
272 The "short labels" option will find the shortest label that uniquely identifies each profile.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
273 Either way, there are some quirks: ffpjsd clips labels to 10 characters if they are greater than 50 characters, so all labels are trimmed to 50 characters first.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
274 Also "id" is prefixed to any numeric label since some tree visualizers won't show purely numeric labels.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
275 In the accidental case where a Fasta sequence label is a duplicate of a previous one it will be prefixed by "DupLabel-".
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
276
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
277 The command line ffpjsd can hang if one provides an l-mer length greater than the length of file content.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
278 One must identify its process id (">ps aux | grep ffpjsd") and kill it (">kill [process id]").
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
279 -------
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
280
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
281 **References**
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
282
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
283 The original ffp-phylogeny code is at http://ffp-phylogeny.sourceforge.net/ .
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
284 This tool uses Aaron Petkau's modified version: https://github.com/apetkau/ffp-3.19-custom .
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
285
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
286 The development of the ff-phylogeny should be attributed to:
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
287
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
288 Sims GE, Jun S-R, Wu GA, Kim S-H. Alignment-free genome comparison with feature frequency profiles (FFP) and optimal resolutions. Proceedings of the National Academy of Sciences of the United States of America 2009;106(8):2677-2682. doi:10.1073/pnas.0813249106.
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
289
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
290 ]]></help>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
291 </tool>
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
292
eb6e5e78a066 Uploaded
damion
parents:
diff changeset
293