# HG changeset patch
# User crs4
# Date 1413380356 14400
# Node ID 30bd2584b6a09315c6d10f526e8eb4405d789697
# Parent 7698311d4466c6a2a1e500e2663eabf3cc17eb99
Uploaded
diff -r 7698311d4466 -r 30bd2584b6a0 cat_paths.xml
--- a/cat_paths.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-
- Concatenate all components of a pathset into a single file.
-
- pydoop
- hadoop-galaxy
-
-
-
- #if $use_hadoop
- dist_cat_paths
- #else
- cat_paths
- #end if
- #if $delete_source
- --delete-source
- #end if
- $input_pathset $output_path
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Datasets represented as pathsets can be split in a number of files.
-This tool takes all of them and concatenates them into a single output file.
-
-In your workflow, you'll need to explicitly set the appropriate data format on the
-output dataset with an Action to "Change Datatype".
-
-"Delete remote input data" option
-====================================
-With this option, after the data has been concated into the new Galaxy dataset,
-the original files that were referenced by the pathset are deleted. This effectively
-tells the action to "move" the data instead of a "copying" it and helps
-avoid amassing intermediate data in your Hadoop workspace.
-
-
-"Use Hadoop-based program" option
-====================================
-
-With this option you will use your entire Hadoop cluster to simultaneously write
-multiple parts of the final file. For this to be possible, the Hadoop nodes
-must be able to access the Galaxy file space directly. In addition, to achieve
-reasonable results the Galaxy workspace should on a parallel shared file system.
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 datatypes_conf.xml
--- a/datatypes_conf.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-
-
-
-
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 dist_text_zipper.xml
--- a/dist_text_zipper.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-
- Compress lots of text files on Hadoop
-
- pydoop
- hadoop-galaxy
-
-
-
- hadoop_galaxy
- --input $input_data
- --output $output
- --executable dist_text_zipper
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This is a Pydoop-based distributed text file compression program.
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/cat_paths.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/cat_paths.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,62 @@
+
+ Concatenate all components of a pathset into a single file.
+
+ pydoop
+ hadoop-galaxy
+
+
+
+ #if $use_hadoop
+ dist_cat_paths
+ #else
+ cat_paths
+ #end if
+ #if $delete_source
+ --delete-source
+ #end if
+ $input_pathset $output_path
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Datasets represented as pathsets can be split in a number of files.
+This tool takes all of them and concatenates them into a single output file.
+
+In your workflow, you'll need to explicitly set the appropriate data format on the
+output dataset with an Action to "Change Datatype".
+
+"Delete remote input data" option
+====================================
+With this option, after the data has been concated into the new Galaxy dataset,
+the original files that were referenced by the pathset are deleted. This effectively
+tells the action to "move" the data instead of a "copying" it and helps
+avoid amassing intermediate data in your Hadoop workspace.
+
+
+"Use Hadoop-based program" option
+====================================
+
+With this option you will use your entire Hadoop cluster to simultaneously write
+multiple parts of the final file. For this to be possible, the Hadoop nodes
+must be able to access the Galaxy file space directly. In addition, to achieve
+reasonable results the Galaxy workspace should on a parallel shared file system.
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/datatypes_conf.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/dist_text_zipper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/dist_text_zipper.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,30 @@
+
+ Compress lots of text files on Hadoop
+
+ pydoop
+ hadoop-galaxy
+
+
+
+ hadoop_galaxy
+ --input $input_data
+ --output $output
+ --executable dist_text_zipper
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This is a Pydoop-based distributed text file compression program.
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/make_pathset.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/make_pathset.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,58 @@
+
+ Create a pathset for a set of files
+
+ pydoop
+ hadoop-galaxy
+
+
+
+ make_pathset
+ #if str($paths.source) == 'tool_input'
+ --force-local --data-format $paths.datapaths.ext "$output_path" "$paths.datapaths"
+ #elif str($paths.source) == 'text_box'
+ #if str($paths.filesystem_select) == "local_fs"
+ --force-local
+ #end if
+ #if $paths.data_format
+ --data-format "$paths.data_format"
+ #end if
+ "$output_path" "$paths.datapaths"
+ #else
+ #raise ValueError("BUG!! unknown paths.source value")
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Create a pathset for a set of files to be used as input for Hadoop tools.
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/put_dataset.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/put_dataset.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,43 @@
+
+ Copy data from Galaxy storage to Hadoop storage.
+
+ pydoop
+ hadoop-galaxy
+
+
+
+ put_dataset
+ #if $workspace != ""
+ --hadoop-workspace "$workspace"
+ #end if
+ #if $use_distcp
+ --distcp
+ #end if
+ "$input_pathset" "$output_path"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This tools copies data from Galaxy's storage to storage that is suitable for
+ Hadoop jobs. An example of a use case may be to copy data from the Galaxy server
+ to HDFS. Whether this tool is required depends on your specific local setup.
+
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/split_pathset.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/split_pathset.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,60 @@
+
+ Split a pathset according to a regular expression criteria
+
+ pydoop
+ hadoop-galaxy
+
+
+
+ split_pathset '$criteria_expr'
+ #if $anchor_end
+ --anchor-end
+ #end if
+ --expand-levels $expand_levels
+ $input_pathset $output_true $output_false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Splits a pathset according to a regular expression.
+
+ You can have the tool expand the paths in the pathset by a certain number
+ of levels prior to testing whether it matches the regular expression.
+
+
+ **Note**: you can't use '$' in your regular expression. To anchor the
+ expression to the end of the path use the checkbox.
+
+
+ *Note*: the regular expression must match the path from its beginning.
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 hadoop_galaxy-13348e73/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hadoop_galaxy-13348e73/tool_dependencies.xml Wed Oct 15 09:39:16 2014 -0400
@@ -0,0 +1,27 @@
+
+
+
+
+
+
+
+
+ git clone https://github.com/crs4/hadoop-galaxy/
+ git reset --hard 0.1.4
+
+
+
+
+
+ $INSTALL_DIR/lib/python
+ export PYTHONPATH=$INSTALL_DIR/lib/python:$PYTHONPATH && python setup.py install --prefix=$INSTALL_DIR --install-lib=$INSTALL_DIR/lib/python
+
+ $INSTALL_DIR/bin
+ $INSTALL_DIR/lib/python
+
+
+
+
+
+
+
diff -r 7698311d4466 -r 30bd2584b6a0 make_pathset.xml
--- a/make_pathset.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-
- Create a pathset for a set of files
-
- pydoop
- hadoop-galaxy
-
-
-
- make_pathset
- #if str($paths.source) == 'tool_input'
- --force-local --data-format $paths.datapaths.ext "$output_path" "$paths.datapaths"
- #elif str($paths.source) == 'text_box'
- #if str($paths.filesystem_select) == "local_fs"
- --force-local
- #end if
- #if $paths.data_format
- --data-format "$paths.data_format"
- #end if
- "$output_path" "$paths.datapaths"
- #else
- #raise ValueError("BUG!! unknown paths.source value")
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Create a pathset for a set of files to be used as input for Hadoop tools.
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 put_dataset.xml
--- a/put_dataset.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-
- Copy data from Galaxy storage to Hadoop storage.
-
- pydoop
- hadoop-galaxy
-
-
-
- put_dataset
- #if $workspace != ""
- --hadoop-workspace "$workspace"
- #end if
- #if $use_distcp
- --distcp
- #end if
- "$input_pathset" "$output_path"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- This tools copies data from Galaxy's storage to storage that is suitable for
- Hadoop jobs. An example of a use case may be to copy data from the Galaxy server
- to HDFS. Whether this tool is required depends on your specific local setup.
-
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 split_pathset.xml
--- a/split_pathset.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-
- Split a pathset according to a regular expression criteria
-
- pydoop
- hadoop-galaxy
-
-
-
- split_pathset '$criteria_expr'
- #if $anchor_end
- --anchor-end
- #end if
- --expand-levels $expand_levels
- $input_pathset $output_true $output_false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Splits a pathset according to a regular expression.
-
- You can have the tool expand the paths in the pathset by a certain number
- of levels prior to testing whether it matches the regular expression.
-
-
- **Note**: you can't use '$' in your regular expression. To anchor the
- expression to the end of the path use the checkbox.
-
-
- *Note*: the regular expression must match the path from its beginning.
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 tool_conf.xml
--- a/tool_conf.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
diff -r 7698311d4466 -r 30bd2584b6a0 tool_dependencies.xml
--- a/tool_dependencies.xml Fri May 30 06:48:47 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-
-
-
-
-
-
-
-
- git clone https://github.com/crs4/hadoop-galaxy/
- git reset --hard 0.1.1
-
-
-
-
-
- $INSTALL_DIR/lib/python
- export PYTHONPATH=$INSTALL_DIR/lib/python:$PYTHONPATH && python setup.py install --prefix=$INSTALL_DIR --install-lib=$INSTALL_DIR/lib/python
-
- $INSTALL_DIR/bin
- $INSTALL_DIR/lib/python
-
-
-
-
-
-
-