comparison SMART/galaxy/GetDifferentialExpression.xml @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents
children
comparison
equal deleted inserted replaced
37:d22fadc825e3 38:2c0c0a89fad7
1 <tool id="GetDifferentialExpression" name="get differential expression">
2 <description>Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file.</description>
3 <requirements>
4 <requirement type="set_environment">PYTHONPATH</requirement>
5 </requirements>
6 <command interpreter="python">
7 ../Java/Python/GetDifferentialExpression.py -i $formatType.inputFileName1
8 #if $formatType.FormatInputFileName1 == 'bed':
9 -f bed
10 #elif $formatType.FormatInputFileName1 == 'gff':
11 -f gff
12 #elif $formatType.FormatInputFileName1 == 'gff2':
13 -f gff2
14 #elif $formatType.FormatInputFileName1 == 'gff3':
15 -f gff3
16 #elif $formatType.FormatInputFileName1 == 'sam':
17 -f sam
18 #elif $formatType.FormatInputFileName1 == 'gtf':
19 -f gtf
20 #end if
21
22 -j $formatType2.inputFileName2
23 #if $formatType2.FormatInputFileName2 == 'bed':
24 -g bed
25 #elif $formatType2.FormatInputFileName2 == 'gff':
26 -g gff
27 #elif $formatType2.FormatInputFileName2 == 'gff2':
28 -g gff2
29 #elif $formatType2.FormatInputFileName2 == 'gff3':
30 -g gff3
31 #elif $formatType2.FormatInputFileName2 == 'sam':
32 -g sam
33 #elif $formatType2.FormatInputFileName2 == 'gtf':
34 -g gtf
35 #end if
36
37 -k $formatTypeRef.inputFileNameRef
38 #if $formatTypeRef.FormatInputFileNameRef == 'bed':
39 -l bed
40 #elif $formatTypeRef.FormatInputFileNameRef == 'gff':
41 -l gff
42 #elif $formatTypeRef.FormatInputFileNameRef == 'gff2':
43 -l gff2
44 #elif $formatTypeRef.FormatInputFileNameRef == 'gff3':
45 -l gff3
46 #elif $formatTypeRef.FormatInputFileNameRef == 'sam':
47 -l sam
48 #elif $formatTypeRef.FormatInputFileNameRef == 'gtf':
49 -l gtf
50 #end if
51
52 -o $outputFileGff
53
54 $simple
55 $adjusted
56
57 #if $optionSimplePara.simplePara == 'Yes':
58 -S $optionSimplePara.paraValue
59 #end if
60
61 #if $optionFixedSizeFactor.FSF == 'Yes':
62 -x $optionFixedSizeFactor.FSFValue
63 #end if
64
65 #if $optionFDR.FDR == 'Yes':
66 -d $optionFDR.FDRValue
67 #end if
68 </command>
69
70 <inputs>
71 <conditional name="formatType">
72 <param name="FormatInputFileName1" type="select" label="Input File Format 1">
73 <option value="bed">bed</option>
74 <option value="gff">gff</option>
75 <option value="gff2">gff2</option>
76 <option value="gff3">gff3</option>
77 <option value="sam">sam</option>
78 <option value="gtf">gtf</option>
79 </param>
80 <when value="bed">
81 <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
82 </when>
83 <when value="gff">
84 <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
85 </when>
86 <when value="gff2">
87 <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
88 </when>
89 <when value="gff3">
90 <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
91 </when>
92 <when value="sam">
93 <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
94 </when>
95 <when value="gtf">
96 <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
97 </when>
98 </conditional>
99
100 <conditional name="formatType2">
101 <param name="FormatInputFileName2" type="select" label="Input File Format 2">
102 <option value="bed">bed</option>
103 <option value="gff">gff</option>
104 <option value="gff2">gff2</option>
105 <option value="gff3">gff3</option>
106 <option value="sam">sam</option>
107 <option value="gtf">gtf</option>
108 </param>
109 <when value="bed">
110 <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
111 </when>
112 <when value="gff">
113 <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
114 </when>
115 <when value="gff2">
116 <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
117 </when>
118 <when value="gff3">
119 <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
120 </when>
121 <when value="sam">
122 <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
123 </when>
124 <when value="gtf">
125 <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
126 </when>
127 </conditional>
128
129 <conditional name="formatTypeRef">
130 <param name="FormatInputFileNameRef" type="select" label="Input Ref File Format ">
131 <option value="bed">bed</option>
132 <option value="gff">gff</option>
133 <option value="gff2">gff2</option>
134 <option value="gff3">gff3</option>
135 <option value="sam">sam</option>
136 <option value="gtf">gtf</option>
137 </param>
138 <when value="bed">
139 <param name="inputFileNameRef" format="bed" type="data" label="Input Ref File"/>
140 </when>
141 <when value="gff">
142 <param name="inputFileNameRef" format="gff" type="data" label="Input Ref File"/>
143 </when>
144 <when value="gff2">
145 <param name="inputFileNameRef" format="gff2" type="data" label="Input Ref File"/>
146 </when>
147 <when value="gff3">
148 <param name="inputFileNameRef" format="gff3" type="data" label="Input Ref File"/>
149 </when>
150 <when value="sam">
151 <param name="inputFileNameRef" format="sam" type="data" label="Input Ref File"/>
152 </when>
153 <when value="gtf">
154 <param name="inputFileNameRef" format="gtf" type="data" label="Input Ref File"/>
155 </when>
156 </conditional>
157
158 <param name="simple" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Normalize using the number of reads in each condition"/>
159 <param name="adjusted" type="boolean" truevalue="-a" falsevalue="" checked="false" label="Normalize using the number of reads of interquartile expression region"/>
160
161 <conditional name="optionSimplePara">
162 <param name="simplePara" type="select" label="provide the number of reads" >
163 <option value="Yes">Yes</option>
164 <option value="No" selected="true">No</option>
165 </param>
166 <when value="Yes">
167 <param name="paraValue" type="text" value="None" label="provide the number of reads" />
168 </when>
169 <when value="No">
170 </when>
171 </conditional>
172
173 <conditional name="optionFixedSizeFactor">
174 <param name="FSF" type="select" label="Give the magnification factor for the normalization using fixed size sliding windows in reference regions (leave empty for no such normalization)">
175 <option value="Yes">Yes</option>
176 <option value="No" selected="true">No</option>
177 </param>
178 <when value="Yes">
179 <param name="FSFValue" type="integer" value="0" />
180 </when>
181 <when value="No">
182 </when>
183 </conditional>
184
185 <conditional name="optionFDR">
186 <param name="FDR" type="select" label="use FDR">
187 <option value="Yes">Yes</option>
188 <option value="No" selected="true">No</option>
189 </param>
190 <when value="Yes">
191 <param name="FDRValue" type="float" value="0.0"/>
192 </when>
193 <when value="No">
194 </when>
195 </conditional>
196
197 </inputs>
198
199 <outputs>
200 <data name="outputFileGff" format="gff3" label="[GetDifferentialExpression]out file"/>
201 </outputs>
202
203 <help>
204 This tool compares two sets of data and find the differential expression. One very important component of the tool is the reference set. Actually, to use the tool, you need the two input sets of data, of course, and the reference set. The reference set is a set of genomic coordinates and, for each interval, it will count the number of feature on each sample and compute the differential expression. For each reference interval, it will output the direction of the regulation (up or down, with respect to the first input set), and a *p*-value from a Fisher exact test.
205
206 This reference set seems boring. Why not computing the differential expression without this set? The answer is: the differential expression of what? I cannot guess it. Actually, you might want to compare the expression of genes, of small RNAs, of transposable elements, of anything... So the reference set can be a list of genes, and in this case, you can compute the differential expression of genes. But you can also compute many other things.
207
208 Suppose that you cluster the data of your two input samples (you can do it with the *clusterize* and the *mergeTranscriptLists* tools). You now have a list of all the regions which are transcribed in at least one of the input samples. This can be your reference set. This reference set is interesting since you can detect the differential expression of data which is outside any annotation.
209
210 Suppose now that you clusterize using a sliding window the two input samples (you can do it with the *clusterizeBySlidingWindows* and the *mergeSlidingWindowsClusters* tools). You can now select all the regions of a given size which contain at least one read in one of the two input samples (do it with *selectByTag* and the tag **nbElements**). Again, this can be an other interesting reference set.
211
212 In most cases, the sizes of the two input samples will be different, so you should probably normalize the data, which is an available option. The ---rather crude--- normalization increases the number of data in the least populated sample and decreases the number of data in the most populated sample to the average number of data.
213 </help>
214 </tool>