comparison data_reader.xml @ 0:ccabef3f7d5f draft

Uploaded first version
author brenninc
date Sun, 08 May 2016 11:01:03 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ccabef3f7d5f
1 <tool id="directory_data_reader" name="Directory Data Finder" version="0.2">
2 <description>Reads a particular data type from a directory on the server</description>
3 <command interpreter="python">
4 <![CDATA[
5 directory_copier.py
6 #if $results.required=="data"
7 --ending .${results.extension.file_type}
8 --link
9 #if $results.start
10 --start $results.start
11 #end if
12 #if $results.last
13 --last $results.last
14 #end if
15 #if $results.extension.file_type=="fa"
16 --new_ending .fasta
17 #end if
18 #if $results.extension.file_type=="fq"
19 --new_ending .fastq
20 #end if
21 #if $results.extension.file_type=="text"
22 --new_ending .txt
23 #end if
24 #if $results.extension.file_type=="tsv"
25 --new_ending .tabular
26 #end if
27 #if $results.extension.file_type in ["fasta.gz"]
28 --decompress
29 --new_ending .fasta
30 #end if
31 #if $results.extension.file_type=="fastq"
32 --new_ending .$results.extension.new_galaxy.new_ending
33 #end if
34 #if $results.extension.file_type=="fastq.gz"
35 --decompress
36 --new_ending .${results.extension.new_galaxy.new_ending}
37 #end if
38 #else
39 --ending bam
40 --ending csv
41 --ending fa
42 --ending fasta
43 --ending fasta.gz
44 --ending fastq
45 --ending fastq.gz
46 --ending fasta
47 --ending fq
48 --ending sam
49 --ending tabular
50 --ending text
51 --ending tsv
52 --ending txt
53 --ending xls
54 --ending xlsx
55 #end if
56 #if $directory.startswith('/'):
57 --path ${directory}
58 #else
59 --path $__tool_directory__/${directory}
60 #end if
61 --list ${listing}
62 ]]>
63 </command>
64 <inputs>
65 <param name="directory" type="text" label="Directory to read data from." />
66 <param name="list_name" type="text" size="25" label="output name" value="input data"/>
67 <conditional name="results">
68 <param name="required" type="select" label="Download data or just directory listing" help="Select type of action required.">
69 <option value="data" selected="true">Data and listing of selected type</option>
70 <option value="listing">Get listing of selected file types </option>
71 </param>
72 <when value="data">
73 <param name="start" type="text" value="" label="String which must be at the start of each file name" />
74 <param name="last" type="text" value="" label="String which must be at the end of the file name (excluding the file type)" />
75 <conditional name="extension">
76 <param name="file_type" type="select" label="File Type" help="File Type.">
77 <option value="bam">*.bam files</option>
78 <option value="csv">*.csv files</option>
79 <option value="fa">*.fa files Files saved as *.fasta for galaxy</option>
80 <option value="fasta">*.fasta files</option>
81 <option value="fasta.gz">*.fasta.gz files</option>
82 <option value="fastq">*.fastq files</option>
83 <option value="fastq.gz">*.fastq.gz files</option>
84 <option value="fastq">*.fastq files</option>
85 <option value="fastq.gz">*.fastq.gz files</option>
86 <option value="fq">*.fq files Files saved as *.fastq for galaxy</option>
87 <option value="sam">*.sam files</option>
88 <option value="tabular">*.tabular Files</option>
89 <option value="text">*.text Files saved as *.txt for galaxy</option>
90 <option value="tsv">*.tsv files saved as *.tabular for galaxy</option>
91 <option value="txt">*.txt Files</option>
92 <option value="xls">*.xls files</option>
93 <option value="xlsx">*.xlsx files</option>
94 </param>
95 <when value="bam" />
96 <when value="csv" />
97 <when value="fa" />
98 <when value="fasta" />
99 <when value="fasta.gz" />
100 <when value="fastq" >
101 <conditional name="new_galaxy">
102 <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used.">
103 <option value="fastq" selected="true">Keep data as general fastq format</option>
104 <option value="fastqsanger">Tag data as fastq sanger in galaxy</option>
105 <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option>
106 <option value="fastqillumina">Tag data as fastq illumina in galaxy</option>
107 </param>
108 <when value="fastq" />
109 <when value="fastqsanger" />
110 <when value="fastqsolexa" />
111 <when value="fastqillumina" />
112 </conditional>
113 </when>
114 <when value="fastq.gz" >
115 <conditional name="new_galaxy">
116 <param name="new_ending" type="select" label="Ending to be used for Galaxy" help="Will determine which down stream tools can be used.">
117 <option value="fastq" selected="true" >Keep data as general fastq format</option>
118 <option value="fastqsanger">Tag data as fastq sanger in galaxy</option>
119 <option value="fastqsolexa">Tag data as fastq solexa in galaxy</option>
120 <option value="fastqillumina">Tag data as fastq illumina in galaxy</option>
121 </param>
122 <when value="fastq" />
123 <when value="fastqsanger" />
124 <when value="fastqsolexa" />
125 <when value="fastqillumina" />
126 </conditional>
127 </when>
128 <when value="fq" />
129 <when value="sam" />
130 <when value="tabular" />
131 <when value="text" />
132 <when value="tsv" />
133 <when value="txt" />
134 <when value="xls" />
135 <when value="xlsx" />
136 </conditional>
137 </when>
138 <when value="listing">
139 </when>
140 </conditional>
141 </inputs>
142 <outputs>
143 <data format="txt" name="listing" label="List of files in $list_name">
144 </data>
145 <!-- Ideally galaxy can get the type based on the file extensions. If so just add the type here -->
146 <collection type="list" label="$list_name" name="data_collection">
147 <filter>(results['required'] == 'data')</filter>
148 <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true" />
149 </collection>
150 </outputs>
151 <tests>
152 <test>
153 <param name="directory" value="test-data" />
154 <param name="list_name" value="csv_files" />
155 <param name="results|extension|file_type" value="csv"/>
156 <output name="listing">
157 <assert_contents>
158 <has_line line="sample1.csv" />
159 </assert_contents>
160 </output>
161 <output_collection name="data_collection" type="list">
162 <element name="sample1" ftype="csv" file="sample1.csv" />
163 </output_collection>
164 </test>
165 <test>
166 <param name="directory" value="test-data" />
167 <param name="results|extension|file_type" value="fa"/>
168 <output name="listing">
169 <assert_contents>
170 <has_line line="sample1.fasta" />
171 <has_line line="other.fasta" />
172 </assert_contents>
173 </output>
174 <output_collection name="data_collection" type="list">
175 <element name="sample1" ftype="fasta" file="sample1.fasta" />
176 <element name="other" ftype="fasta" file="sample1.fasta" />
177 </output_collection>
178 </test>
179 <test>
180 <param name="directory" value="test-data" />
181 <param name="results|extension|file_type" value="fasta"/>
182 <param name="results|start" value="sam" />
183 <output name="listing">
184 <assert_contents>
185 <has_line line="sample1.fasta" />
186 <not_has_text text="other.fasta" />
187 </assert_contents>
188 </output>
189 <output_collection name="data_collection" type="list">
190 <element name="sample1" ftype="fasta" file="sample1.fasta" />
191 </output_collection>
192 </test>
193 <test>
194 <param name="directory" value="test-data" />
195 <param name="results|extension|file_type" value="fasta.gz"/>
196 <param name="results|last" value="le1" />
197 <output name="listing">
198 <assert_contents>
199 <has_line line="sample1.fasta" />
200 <not_has_text text="other.fasta" />
201 </assert_contents>
202 </output>
203 <output_collection name="data_collection" type="list">
204 <element name="sample1" ftype="fasta" file="sample1.fasta" />
205 </output_collection>
206 </test>
207 <test>
208 <param name="directory" value="test-data" />
209 <param name="results|extension|file_type" value="fq"/>
210 <output name="listing">
211 <assert_contents>
212 <has_line line="sample1.fastq" />
213 </assert_contents>
214 </output>
215 <output_collection name="data_collection" type="list">
216 <element name="sample1" ftype="fastq" file="sample1.fastq" />
217 </output_collection>
218 </test>
219 <test>
220 <param name="directory" value="test-data" />
221 <param name="results|extension|file_type" value="fastq"/>
222 <output name="listing">
223 <assert_contents>
224 <has_line line="sample1.fastq" />
225 </assert_contents>
226 </output>
227 <output_collection name="data_collection" type="list">
228 <element name="sample1" ftype="fastq" file="sample1.fastq" />
229 </output_collection>
230 </test>
231 <test>
232 <param name="directory" value="test-data" />
233 <param name="results|extension|file_type" value="fastq"/>
234 <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/>
235 <output name="listing">
236 <assert_contents>
237 <has_line line="sample1.fastqsanger" />
238 </assert_contents>
239 </output>
240 <output_collection name="data_collection" type="list">
241 <element name="sample1" ftype="fastqsanger" file="sample1.fastq" />
242 </output_collection>
243 </test>
244 <test>
245 <param name="directory" value="test-data" />
246 <param name="results|extension|file_type" value="fastq.gz"/>
247 <output name="data_collection">
248 <assert_contents>
249 <has_line line="sample1.fastq" />
250 </assert_contents>
251 </output>
252 <output_collection name="data_collection" type="list">
253 <element name="sample1" ftype="fastq" file="sample1.fastq" />
254 </output_collection>
255 </test>
256 <test>
257 <param name="directory" value="test-data" />
258 <param name="results|extension|file_type" value="fastq.gz"/>
259 <param name="results|extension|new_galaxy|new_ending" value="fastqsanger"/>
260 <output name="listing_fastq_gz">
261 <assert_contents>
262 <has_line line="sample1.fastqsanger" />
263 </assert_contents>
264 </output>
265 <output_collection name="data_collection" type="list">
266 <element name="sample1" ftype="fastqsanger" file="sample1.fastq" />
267 </output_collection>
268 </test>
269 <test>
270 <param name="directory" value="test-data" />
271 <param name="results|extension|file_type" value="sam"/>
272 <output name="listing">
273 <assert_contents>
274 <has_line line="sample1.sam" />
275 </assert_contents>
276 </output>
277 <output_collection name="data_collection" type="list">
278 <element name="sample1" ftype="sam" file="sample1.sam" />
279 </output_collection>
280 </test>
281 <test>
282 <param name="directory" value="test-data" />
283 <param name="results|extension|file_type" value="tabular"/>
284 <output name="listing">
285 <assert_contents>
286 <has_line line="sample1.tabular" />
287 </assert_contents>
288 </output>
289 <output_collection name="data_collection" type="list">
290 <element name="sample1" ftype="tabular" file="sample1.tabular" />
291 </output_collection>
292 </test>
293 <test>
294 <param name="directory" value="test-data" />
295 <param name="results|extension|file_type" value="text"/>
296 <output name="listing_text">
297 <assert_contents>
298 <has_line line="sample1.txt" />
299 </assert_contents>
300 </output>
301 <output_collection name="data_collection" type="list">
302 <element name="sample1" ftype="txt" file="sample1.text" />
303 </output_collection>
304 </test>
305 <test>
306 <param name="directory" value="test-data" />
307 <param name="results|extension|file_type" value="tsv"/>
308 <output name="data_collection">
309 <assert_contents>
310 <has_line line="sample1.tabular" />
311 </assert_contents>
312 </output>
313 <output_collection name="data_collection" type="list">
314 <element name="sample1" ftype="tabular" file="sample1.tsv" />
315 </output_collection>
316 </test>
317 <test>
318 <param name="directory" value="test-data" />
319 <param name="results|extension|file_type" value="txt" />
320 <output name="listing">
321 <assert_contents>
322 <has_line line="sample1.txt" />
323 </assert_contents>
324 </output>
325 <output_collection name="data_collection" type="list">
326 <element name="sample1" ftype="txt" file="sample1.txt" />
327 <element name="sample2" ftype="txt" file="sample2.txt" />
328 </output_collection>
329 </test>
330 <test>
331 <param name="directory" value="test-data" />
332 <param name="results|extension|file_type" value="xls"/>
333 <output name="listing">
334 <assert_contents>
335 <has_line line="sample1.xls" />
336 </assert_contents>
337 </output>
338 <output_collection name="data_collection" type="list">
339 <element name="sample1" ftype="xls" file="sample1.xls" />
340 </output_collection>
341 </test>
342 <test>
343 <param name="directory" value="test-data" />
344 <param name="results|extension|file_type" value="xlsx"/>
345 <output name="listing">
346 <assert_contents>
347 <has_line line="sample1.xlsx" />
348 </assert_contents>
349 </output>
350 <output_collection name="data_collection" type="list">
351 <element name="sample1" ftype="xlsx" file="sample1.xlsx" />
352 </output_collection>
353 </test>
354 <test>
355 <param name="directory" value="test-data" />
356 <param name="results|required" value="listing"/>
357 <output name="listing_all">
358 <assert_contents>
359 <has_line line="sample1.csv" />
360 <has_line line="sample1.fasta" />
361 <has_line line="sample1.fasta.gz" />
362 <has_line line="sample1.fastq" />
363 <has_line line="sample1.fastq.gz" />
364 <has_line line="sample1.sam" />
365 <has_line line="sample1.tabular" />
366 <has_line line="sample1.text" />
367 <has_line line="sample1.tsv" />
368 <has_line line="sample1.txt" />
369 <has_line line="sample1.xls" />
370 <has_line line="sample1.xlsx" />
371 </assert_contents>
372 </output>
373 </test>
374
375 </tests>
376
377 <help>
378 <![CDATA[
379 This tool will lookup files on the Galaxy server machine, including mounted directories.
380
381 Only directories that are included in the white list and not in the black list are allowed.
382 If the directory you require does not pass the white list or blacklist test please contact the local galaxy admin.
383 (Admins see README)
384
385 This tool only supports a limited set of types and file extenstions.
386 No other files are ever returned either by data or listing.
387 These endings are case senitive.
388
389 ====
390
391 The data options will look for all files that have a particular ending in the selected directory.
392
393 The tool will return two things.
394
395 1. A Dataset collection of all the detected files.
396
397 2. A file with the names of all the detected files. These will be sorted in the same order as galaxy builds the dataset collection.
398
399 The files can be filtered by setting a specific start strinf for the file name.
400 Only files that start with this string (case senstive) will be included.
401
402 Files can also be filter for the last part before the file extsentions.
403
404 Assuming the directory has:
405 C01_R1_001.fasta C01_R2_001.fatsa C02_R1_001.fasta C02_R2_001.fatsa
406
407 Setting start C01 will return just the C01 files: C01_R1_001.fasta C01_R2_001.fatsa
408
409 Setting last R1_001 will return the read1 files: C01_R1_001.fasta C02_R1_001.fasta
410
411 As Galaxy detects the file type based on the extension this tool will change the exstension for supported alternative file ends.
412 This includes (manually) setting the exstension to fastqsanger, fastqsolexa, fastqillumina for tools that specify one of these.
413
414 This tool will unzip gz files.
415
416 ====
417
418 The listing option will return a txt file with all the files found with any of the supported endings. Other files in that directory are not included.
419
420 The file exstensions are left as in the directory.
421
422 File start and last filters are not supported in this mode.
423 ]]>
424 </help>
425 <citations>
426 </citations>
427
428 </tool>