Mercurial > repos > crs4 > hadoop_galaxy
comparison put_dataset.xml @ 1:30bd2584b6a0 draft default tip
Uploaded
author | crs4 |
---|---|
date | Wed, 15 Oct 2014 09:39:16 -0400 |
parents | 7698311d4466 |
children |
comparison
equal
deleted
inserted
replaced
0:7698311d4466 | 1:30bd2584b6a0 |
---|---|
1 <tool id="hadoop_galaxy_put_dataset" name="Put dataset" version="0.1.0"> | |
2 <description>Copy data from Galaxy storage to Hadoop storage.</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.11">pydoop</requirement> | |
5 <requirement type="package" version="0.1.1">hadoop-galaxy</requirement> | |
6 </requirements> | |
7 | |
8 <command> | |
9 put_dataset | |
10 #if $workspace != "" | |
11 --hadoop-workspace "$workspace" | |
12 #end if | |
13 #if $use_distcp | |
14 --distcp | |
15 #end if | |
16 "$input_pathset" "$output_path" | |
17 </command> | |
18 | |
19 <inputs> | |
20 <param name="input_pathset" type="data" format="pathset" label="Galaxy pathset" /> | |
21 | |
22 <param name="workspace" type="text" label="Path to workspace for Hadoop data" | |
23 help="The data will be copied to a new directory under this path. The value can also be set through the HADOOP_GALAXY_PUT_DIR environment variable." /> | |
24 | |
25 <param name="use_distcp" type="boolean" checked="false" label="Use Hadoop distcp2" | |
26 help="Use distcp2 if Hadoop can access Galaxy's storage space and you're copying a large dataset." /> | |
27 </inputs> | |
28 | |
29 <outputs> | |
30 <data name="output_path" format="pathset" label="Hadoop pathset from $input_pathset.name" /> | |
31 </outputs> | |
32 | |
33 <stdio> | |
34 <exit_code range="1:" level="fatal" /> | |
35 </stdio> | |
36 | |
37 <help> | |
38 This tools copies data from Galaxy's storage to storage that is suitable for | |
39 Hadoop jobs. An example of a use case may be to copy data from the Galaxy server | |
40 to HDFS. Whether this tool is required depends on your specific local setup. | |
41 </help> | |
42 | |
43 </tool> |