comparison SMART/galaxy/compareOverlapping.xml @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents
children
comparison
equal deleted inserted replaced
37:d22fadc825e3 38:2c0c0a89fad7
1 <tool id="CompareOverlapping" name="compare overlapping">
2 <description>Print all the transcripts from a first file which overlap with the transcripts from a second file.</description>
3 <requirements>
4 <requirement type="set_environment">PYTHONPATH</requirement>
5 </requirements>
6 <command interpreter="python">
7 ../Java/Python/CompareOverlapping.py -i $formatType.inputFileName1
8 #if $formatType.FormatInputFileName1 == 'bed':
9 -f bed
10 #elif $formatType.FormatInputFileName1 == 'gff':
11 -f gff
12 #elif $formatType.FormatInputFileName1 == 'gff2':
13 -f gff2
14 #elif $formatType.FormatInputFileName1 == 'gff3':
15 -f gff3
16 #elif $formatType.FormatInputFileName1 == 'sam':
17 -f sam
18 #elif $formatType.FormatInputFileName1 == 'gtf':
19 -f gtf
20 #end if
21
22 -j $formatType2.inputFileName2
23 #if $formatType2.FormatInputFileName2 == 'bed':
24 -g bed
25 #elif $formatType2.FormatInputFileName2 == 'gff':
26 -g gff
27 #elif $formatType2.FormatInputFileName2 == 'gff2':
28 -g gff2
29 #elif $formatType2.FormatInputFileName2 == 'gff3':
30 -g gff3
31 #elif $formatType2.FormatInputFileName2 == 'sam':
32 -g sam
33 #elif $formatType2.FormatInputFileName2 == 'gtf':
34 -g gtf
35 #end if
36
37 -o $outputFileGff
38
39 #if $optionNFirstFile1.NFirstForFile1 == 'Yes':
40 -S $optionNFirstFile1.firstNtFile1
41 #end if
42 #if $optionNFirstFile2.NFirstForFile2 == 'Yes':
43 -s $optionNFirstFile2.firstNtFile2
44 #end if
45 #if $optionNLastFile1.NLastForFile1 == 'Yes':
46 -U $optionNLastFile1.lastNtFile1
47 #end if
48 #if $optionNLastFile2.NLastForFile2 == 'Yes':
49 -u $optionNLastFile2.lastNtFile2
50 #end if
51
52 #if $optionExtentionCinqFile1.extentionFile1 == 'Yes':
53 -E $optionExtentionCinqFile1.extention51
54 #end if
55 #if $optionExtentionCinqFile2.extentionFile2 == 'Yes':
56 -e $optionExtentionCinqFile2.extention52
57 #end if
58
59 #if $optionExtentionTroisFile1.extentionFile1 == 'Yes':
60 -N $optionExtentionTroisFile1.extention31
61 #end if
62 #if $optionExtentionTroisFile2.extentionFile2 == 'Yes':
63 -n $optionExtentionTroisFile2.extention32
64 #end if
65
66 #if $OptionColinearOrAntiSens.OptionCA == 'Colinear':
67 -c
68 #elif $OptionColinearOrAntiSens.OptionCA == 'AntiSens':
69 -a
70 #end if
71
72 #if $OptionDistance.Dist == 'Yes':
73 -d $OptionDistance.distance
74 #end if
75
76 #if $OptionMinOverlap.MO == 'Yes':
77 -m $OptionMinOverlap.minOverlap
78 #end if
79
80 $InvertMatch
81 $ReportIntron
82 $NotOverlapping
83
84 </command>
85
86 <inputs>
87 <conditional name="formatType">
88 <param name="FormatInputFileName1" type="select" label="Input File Format 1">
89 <option value="bed">bed</option>
90 <option value="gff">gff</option>
91 <option value="gff2">gff2</option>
92 <option value="gff3">gff3</option>
93 <option value="sam">sam</option>
94 <option value="gtf">gtf</option>
95 </param>
96 <when value="bed">
97 <param name="inputFileName1" format="bed" type="data" label="Input File 1"/>
98 </when>
99 <when value="gff">
100 <param name="inputFileName1" format="gff" type="data" label="Input File 1"/>
101 </when>
102 <when value="gff2">
103 <param name="inputFileName1" format="gff2" type="data" label="Input File 1"/>
104 </when>
105 <when value="gff3">
106 <param name="inputFileName1" format="gff3" type="data" label="Input File 1"/>
107 </when>
108 <when value="sam">
109 <param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
110 </when>
111 <when value="gtf">
112 <param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
113 </when>
114 </conditional>
115
116 <conditional name="formatType2">
117 <param name="FormatInputFileName2" type="select" label="Input File Format 2">
118 <option value="bed">bed</option>
119 <option value="gff">gff</option>
120 <option value="gff2">gff2</option>
121 <option value="gff3">gff3</option>
122 <option value="sam">sam</option>
123 <option value="gtf">gtf</option>
124 </param>
125 <when value="bed">
126 <param name="inputFileName2" format="bed" type="data" label="Input File 2"/>
127 </when>
128 <when value="gff">
129 <param name="inputFileName2" format="gff" type="data" label="Input File 2"/>
130 </when>
131 <when value="gff2">
132 <param name="inputFileName2" format="gff2" type="data" label="Input File 2"/>
133 </when>
134 <when value="gff3">
135 <param name="inputFileName2" format="gff3" type="data" label="Input File 2"/>
136 </when>
137 <when value="sam">
138 <param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
139 </when>
140 <when value="gtf">
141 <param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
142 </when>
143 </conditional>
144
145 <conditional name="optionNFirstFile1">
146 <param name="NFirstForFile1" type="select" label="Shrink the queries to their first N nt.">
147 <option value="Yes">Yes</option>
148 <option value="No" selected="true">No</option>
149 </param>
150 <when value="Yes">
151 <param name="firstNtFile1" type="integer" value="1" label="size" />
152 </when>
153 <when value="No">
154 </when>
155 </conditional>
156 <conditional name="optionNFirstFile2">
157 <param name="NFirstForFile2" type="select" label="Shrink the references to their first N nt.">
158 <option value="Yes">Yes</option>
159 <option value="No" selected="true">No</option>
160 </param>
161 <when value="Yes">
162 <param name="firstNtFile2" type="integer" value="1" label="size" />
163 </when>
164 <when value="No">
165 </when>
166 </conditional>
167
168 <conditional name="optionNLastFile1">
169 <param name="NLastForFile1" type="select" label="Shrink the queries to their last N nt.">
170 <option value="Yes">Yes</option>
171 <option value="No" selected="true">No</option>
172 </param>
173 <when value="Yes">
174 <param name="lastNtFile1" type="integer" value="1" label="size"/>
175 </when>
176 <when value="No">
177 </when>
178 </conditional>
179 <conditional name="optionNLastFile2">
180 <param name="NLastForFile2" type="select" label="Shrink the references to their last N nt.">
181 <option value="Yes">Yes</option>
182 <option value="No" selected="true">No</option>
183 </param>
184 <when value="Yes">
185 <param name="lastNtFile2" type="integer" value="1" label="size"/>
186 </when>
187 <when value="No">
188 </when>
189 </conditional>
190
191 <conditional name="optionExtentionCinqFile1">
192 <param name="extentionFile1" type="select" label="Extend the query features towards the 5' end">
193 <option value="Yes">Yes</option>
194 <option value="No" selected="true">No</option>
195 </param>
196 <when value="Yes">
197 <param name="extention51" type="integer" value="1" label="in file 1" />
198 </when>
199 <when value="No">
200 </when>
201 </conditional>
202
203 <conditional name="optionExtentionCinqFile2">
204 <param name="extentionFile2" type="select" label="Extend the reference features towards 5' end">
205 <option value="Yes">Yes</option>
206 <option value="No" selected="true">No</option>
207 </param>
208 <when value="Yes">
209 <param name="extention52" type="integer" value="1" label="in file 2"/>
210 </when>
211 <when value="No">
212 </when>
213 </conditional>
214
215 <conditional name="optionExtentionTroisFile1">
216 <param name="extentionFile1" type="select" label="Extend the query features towards 3' end">
217 <option value="Yes">Yes</option>
218 <option value="No" selected="true">No</option>
219 </param>
220 <when value="Yes">
221 <param name="extention31" type="integer" value="1" label="in file 1" />
222 </when>
223 <when value="No">
224 </when>
225 </conditional>
226
227 <conditional name="optionExtentionTroisFile2">
228 <param name="extentionFile2" type="select" label="Extend the reference features towards 3' end">
229 <option value="Yes">Yes</option>
230 <option value="No" selected="true">No</option>
231 </param>
232 <when value="Yes">
233 <param name="extention32" type="integer" value="1" label="in file 2" />
234 </when>
235 <when value="No">
236 </when>
237 </conditional>
238
239 <conditional name="OptionColinearOrAntiSens">
240 <param name="OptionCA" type="select" label="Report queries which are collinear/antisens w.r.t. a reference">
241 <option value="Colinear">Colinear</option>
242 <option value="AntiSens">AntiSens</option>
243 <option value="NONE" selected="true">NONE</option>
244 </param>
245 <when value="Colinear">
246 </when>
247 <when value="AntiSens">
248 </when>
249 <when value="NONE">
250 </when>
251 </conditional>
252
253 <conditional name="OptionDistance">
254 <param name="Dist" type="select" label="Maximum Distance between two reads">
255 <option value="Yes">Yes</option>
256 <option value="No" selected="true">No</option>
257 </param>
258 <when value="Yes">
259 <param name="distance" type="integer" value="0"/>
260 </when>
261 <when value="No">
262 </when>
263 </conditional>
264
265 <conditional name="OptionMinOverlap">
266 <param name="MO" type="select" label="Minimum number of overlapping between two reads">
267 <option value="Yes">Yes</option>
268 <option value="No" selected="true">No</option>
269 </param>
270 <when value="Yes">
271 <param name="minOverlap" type="integer" value="1"/>
272 </when>
273 <when value="No">
274 </when>
275 </conditional>
276 <param name="ReportIntron" type="boolean" truevalue="-t" falsevalue="" checked="false" label="Also report queries which overlap with the introns of references, or queries such that a reference is in one of its intron"/>
277 <param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
278 <param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
279 </inputs>
280
281 <outputs>
282 <data name="outputFileGff" format="gff3"/>
283 </outputs>
284
285 <help>
286 This script may be the most important one. It basically compares two sets of transcripts and keeps those from the first set which overlap with the second one. The first set is considered as the query set (basically, your data) and the second one is the reference set (RefSeq data, for example).
287
288 It is vital to understand that it will output the elements of the first file which overlap with the elements of the second one.
289
290 Various modifiers are also available:
291
292 -Restrict query / reference set to the first nucleotide. Useful to check if the TSS of one set overlap with the other one.
293
294 -Extend query / reference set on the 5' / 3' direction. Useful to check if one set is located upstream / downstream the other one.
295
296 -Include introns in the comparison.
297
298 -Invert selection (report those which do not overlap).
299
300 -Restrict to colinear / anti-sense overlapping data.
301
302 -Keep the query data even if they do not strictly overlap with the reference data, but are located not further away than *n* nucleotide from some reference data.
303
304 -Keep the query data with are strictly included into reference data, meaning that a query transcript such that at least 1 nucleotide does not overlap with reference data will not be presented as a solution.
305
306 The mechanism of shrinking and extending is also useful to make a fine grain comparison. For example, if you want to keep those such that the TSS is overlapping the reference set, you just shrink the query set to 1 nucleotide. Now, if you want to keep those which are overlapping you data or located 2kb downstream of it, just extend the query data in the downstream direction, and you will have what you want. You can also extend in the opposite direction to get the possible transcript factor sites which are upstream.
307
308 Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
309 </help>
310 </tool>