# HG changeset patch
# User edward-kirton
# Date 1322790096 18000
# Node ID 9974ff5df008526c4fcbc1452c1be9a9aae80e0e
Uploaded
diff -r 000000000000 -r 9974ff5df008 data_nfs/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/README Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,14 @@
+There are four tools contained herein, but only two are required.
+
+The recommended way to import data into galaxy is by using galaxy_import.pl from the command-line;
+the galaxy user must also run the gcpd.pl script periodically (e.g. every 2 min via cron).
+
+The older tool for copying/symlinking to import is included by not recommended as it requires the user
+to manually edit the metadata for each file imported.
+
+The recommended way to export data is to, again, use the FTP method (cp_ftp tool).
+
+The older tool to export to NFS path is included if you prefer it.
+
+Author/Support: ESKirton@LBL.gov
+License: Copyright(c) 2011 US DOE Joint Genome Institute; use freely under the same license as Galaxy itself.
diff -r 000000000000 -r 9974ff5df008 data_nfs/cp_ftp.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/cp_ftp.pl Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,40 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use File::Copy;
+use File::Basename;
+
+# SETTINGS
+my $autocreate=1;
+
+# VALIDATE INPUT
+my ($ftp_dir, $user, $logfile, @files)=@ARGV;
+die("This galaxy instance does not have FTP enabled\n") unless $ftp_dir; # i.e. in universe_wsgi.ini
+die("FTP dir, $ftp_dir, does not exist!\n") unless -d $ftp_dir;
+die("Invalid email address: $user\n") if $user =~ /^[\.\/]/;
+my $dest="$ftp_dir/$user";
+unless (-d $dest) {
+ if ($autocreate) {
+ mkdir($dest) or die("Unable to mkdir, $dest: $!\n");
+ chmod 0775, $dest or die("Unable to chmod $dest: $!\n");
+ } else {
+ die("User $user does not have an FTP folder\n");
+ }
+}
+
+# COPY FILES, WRITE TO LOG
+open(OUT, ">$logfile") or die($!);
+while (@files) {
+ my $file=shift @files or die("Source filename required\n");
+ my $name=shift @files or die("Destination filename required\n");
+ die("Source file, $file, does not exist\n") unless -e $file;
+ copy($file, "$dest/$name") or die($!);
+ print OUT "Copied $name\n";
+}
+close OUT;
+print "Please delete the files from your FTP folder after downloading.\n";
+exit;
+__END__
+Copyright (c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
diff -r 000000000000 -r 9974ff5df008 data_nfs/cp_ftp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/cp_ftp.xml Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,28 @@
+
+
+Download large files via FTP
+cp_ftp.pl $__app__.config.ftp_upload_dir $__user_email__ $logfile
+#for $i in $files
+${i.file} ${i.name}
+#end for
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Use this tool to copy files to your FTP folder.
+
+Use an FTP client to connect to the Galaxy FTP server and use your usual Galaxy login.
+
+Please delete the files from your FTP folder after you have downloaded them.
+
+
diff -r 000000000000 -r 9974ff5df008 data_nfs/export.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/export.pl Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,36 @@
+#!/usr/bin/perl
+
+use strict;
+use File::Copy;
+use File::Basename;
+
+# SETTINGS
+my $password='jgi'; # changeme
+
+# ARGS
+my ($trypass, $symlink, $dest, $logfile, @files)=@ARGV;
+die("Invalid password\n") unless $trypass and $password and $trypass eq $password;
+die("Absolute path required\n") unless $dest =~ /^\//;
+die("Paths containing '..' are disallowed\n") if $dest =~ /\/\.\.\//;
+die("Only /home/*, /house/*, and /ifs/* paths are allowed\n") unless $dest =~ /^\/home/ or $dest =~ /^\/house/ or $dest =~ /^\/ifs/;
+die("Destination folder does not exist: $dest\n") unless -e $dest;
+die("Destination path is not a folder: $dest\n") unless -d $dest;
+
+# CP
+open(OUT, ">$logfile") or die($!);
+while (@files) {
+ my $file=shift @files or die("Source filename required\n");
+ my $name=shift @files or die("Destination filename required\n");
+ print OUT "$file -> $dest/$name\n";
+ if ($symlink) {
+ symlink($file, "$dest/$name");
+ } else {
+ copy($file, "$dest/$name");
+ }
+}
+close OUT;
+print "Exported ", scalar(@files), " to $dest\n";
+exit;
+__END__
+Copyright (c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
diff -r 000000000000 -r 9974ff5df008 data_nfs/export.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/export.xml Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,30 @@
+
+files to NFS path
+export.pl $password $symlink $dest $logfile
+#for $i in $files
+${i.file} ${i.name}
+#end for
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool allows staff to export files from Galaxy to a specified NFS path.
+
+The destination folder must be writable by the Galaxy user. It is suggested that you create a folder ~/dropbox and chmod 777 it.
+
+
diff -r 000000000000 -r 9974ff5df008 data_nfs/galaxy_import.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/galaxy_import.pl Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+
+## NB: THIS TOOL ASSUMES EMAIL ADDRESSES ARE GALAXY LOGINS
+## MODIFICATIONS WILL BE REQUIRED IF YOU USE LDAP AUTHENTICATION
+
+use warnings;
+use strict;
+use Cwd qw/abs_path/;
+use Env qw/EMAIL/;
+use Getopt::Long;
+
+## SETTINGS
+my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
+my $admin_email = 'admin@yoursite.gov';
+
+## HELP
+my $usage=<<'ENDHERE';
+NAME:
+ galaxy_import.pl
+PURPOSE:
+ To queue files to copy to Galaxy user's FTP folder for subsequent import into Galaxy.
+INPUT:
+ @ARGV : one or more paths to copy (by default, skip folders; see -r)
+ -r|recurse : recursively copy folders
+ -e|email : your email is your Galaxy user ID; only required if $EMAIL is not defined
+ NB: email is case sensitive, must be same as Galaxy login.
+OUTPUT:
+ stderr : error/warning messages
+ -dev : copy to Galaxy/Dev instead of Galaxy/JGI (for developers only)
+NOTES:
+ - ONLY *WORLD* READABLE PATHS CAN BE COPIED
+ - user's environment variable, $EMAIL, may be set to make the --email option unnecessary
+ - the copied files must be preprocessed by the galaxy copy daemon, which runs every 2min
+ - you must import the files into Galaxy within 1 month or they will be purged!
+ENDHERE
+
+## OPTIONS
+my ($help,$email,$recurse,$dev);
+GetOptions(
+ 'help' => \$help,
+ 'email=s' => \$email,
+ 'recurse' => \$recurse,
+ 'dev' => \$dev
+);
+
+## VALIDATE INPUT
+if (!@ARGV or $help) {
+ print $usage;
+ exit;
+}
+if ($email) {
+ 1;
+} elsif ($EMAIL) {
+ $email=$EMAIL;
+} else {
+ die("Either --email or environment variable \$EMAIL is required\n");
+}
+die("This doesn't look like a valid email: $email\n") unless $email =~ /^\S+@\w+\.\w+$/;
+die("Queue file, $queue_file, does not exist; please notify Galaxy administrator at $admin_email\n") unless -f $queue_file;
+
+# VALIDATE PATHS
+my %paths=();
+foreach my $path (@ARGV) {
+ $path = abs_path($path);
+ if (-f $path) {
+ if (exists($paths{$path})) {
+ warn("Skipping duplicate file, $path\n");
+ } else {
+ $paths{$path}=1;
+ }
+ } elsif (-d $path) {
+ if ($recurse) {
+ if (exists($paths{$path})) {
+ warn("Skipping duplicate folder, $path\n");
+ } else {
+ $paths{$path}=1;
+ }
+ } else {
+ warn("Skipping folder, $path; use -r to process folders\n");
+ }
+ } else {
+ warn("Invalid path, $path\n");
+ }
+}
+
+# APPEND QUEUE FILE
+my @paths=keys %paths;
+exit unless @paths;
+open(QUEUE, ">>$queue_file") or die($!);
+while (@paths) {
+ my $path=shift @paths;
+ print QUEUE join("\t", $email, $path),"\n";
+}
+close QUEUE;
+exit;
+__END__
+Copyright(c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
diff -r 000000000000 -r 9974ff5df008 data_nfs/gcpd.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/gcpd.pl Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,128 @@
+#!/usr/bin/perl
+
+# SET UP A CRON JOB THAT RUNS FREQUENTLY (EG. EVERY 2 MIN)
+
+use warnings;
+use strict;
+use File::Copy;
+use File::Which;
+use File::Basename;
+
+## SETTINGS
+my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
+my $ftp_dir = '/your/galaxy/ftp';
+my $admin_email = 'admin@yoursite.gov';
+
+# INPUT
+die("No args expected\n") if @ARGV;
+die("Queue file does not exist: $queue_file\n") unless -f $queue_file;
+die("FTP folder does not exist: $ftp_dir\n") unless -d $ftp_dir;
+
+# EXIT IF EMPTY
+exit unless -s $queue_file;
+
+# CHECK IF SENDMAIL AVAILABLE
+our $sendmail=which('sendmail'); # undef if not found
+
+# MOVE QUEUE, CREATE NEW EMPTY FILE
+my $current_queue_file="$queue_file.$$";
+move($queue_file,$current_queue_file);
+open(Q, ">$queue_file") or die($!);
+close(Q) or die($!);
+chmod 0666, $queue_file or die("Unable to chmod queue file: $!\n");
+
+# LOAD QUEUE
+my @queue=();
+open(Q, "<$current_queue_file") or die($!);
+while () {
+ chomp;
+ my @row=split(/\t/);
+ push @queue, \@row;
+}
+close Q;
+unlink($current_queue_file);
+
+# COPY FILES
+my %notify=();
+while (@queue) {
+ my $row=shift @queue;
+ my ($email,$path)=@$row;
+ copypath($path,"$ftp_dir/$email");
+ if ($sendmail) {
+ $notify{$email}=[] unless exists($notify{$email});
+ push @{$notify{$email}}, $path;
+ }
+}
+
+# SEND NOTIFICATION EMAILS
+foreach my $email (keys %notify) {
+ my $msg="The following files are available in your FTP folder; use the Upload tool to import them.\n\n"
+ . join("\n", @{$notify{$email}})."\n";
+ email($sendmail,$admin_email,'Galaxy import complete',$email,$msg);
+}
+
+## SUBROUTINES
+
+# COPY FILE/FOLDER TO DEST FOLDER
+sub copypath {
+ my ($src,$destdir)=@_;
+ return unless $src and $destdir;
+ $src = $1 if $src =~ /^(.+)\/$/;
+ $destdir = $1 if $destdir =~ /^(.+)\/$/;
+ # make dest if not exist
+ unless (-d $destdir) {
+ mkdir($destdir) or warn("Unable to mkdir $destdir: $!\n");
+ chmod 0775, $destdir or warn("Unable to chmod dir, $destdir: $!\n");
+ }
+ if (-d $src) {
+ #print "Recursively copy folder $src to $destdir\n";
+ my ($subdir,$parentdir)=fileparse($src);
+ $destdir .= "/$subdir";
+ # make dest if not exist
+ unless (-d $destdir) {
+ mkdir($destdir) or warn("Unable to mkdir $destdir: $!\n");
+ chmod 0775, $destdir or warn("Unable to chmod dir, $destdir: $!\n");
+ }
+ # process src folder
+ unless (opendir(DIR, $src)) {
+ warn("Unable to open dir, $src\n");
+ return;
+ }
+ my @files= grep { $_ !~ /^\./ } readdir DIR or warn("Unable to readdir, $src: $!\n");
+ closedir(DIR);
+ #print "Folder, $src, contains ", scalar(@files), " files\n";
+ foreach my $file (@files) {
+ copypath("$src/$file",$destdir);
+ }
+ } elsif (-f $src) {
+ my ($file,$dir)=fileparse($src);
+ my $destfile="$destdir/$file";
+ #print "Copy file, $src to $destfile\n";
+ copy($src,$destdir) or warn("Unable to copy $src: $!\n");
+ chmod 0664, $destfile or warn("Unable to chmod file, $destfile: $!\n");
+ } else {
+ warn("Invalid path, $src\n");
+ }
+}
+
+sub email {
+ my ($sendmail, $from, $subj, $to, $msg)=@_;
+ return unless defined($sendmail);
+ die("From email not defined\n") unless $from;
+ die("Subj not defined\n") unless $subj;
+ die("Receipient email not defined\n") unless $to;
+ die("Message not defined\n") unless $msg;
+ my $email=
+ "Reply-to: $from\n".
+ "Subject: $subj\n".
+ "To: $to\n".
+ "Content-type: text/plain\n\n".
+ $msg;
+ open(SENDMAIL, "|$sendmail -t") or die "Cannot open $sendmail: $!";
+ print SENDMAIL $email;
+ close(SENDMAIL);
+}
+
+__END__
+Copyright(c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
diff -r 000000000000 -r 9974ff5df008 data_nfs/link_path.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/link_path.pl Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+# THIS TOOL HAS BEEN DEPRECATED IN FAVOR OF THE galaxy_import.pl AND gcpd.pl METHOD
+# WHICH DON'T REQUIRE SETTING METADATA MANUALLY FOR EACH FILE.
+
+use strict;
+use File::Copy;
+
+# CONFIG
+my $password='jgi'; # CHANGEME
+my @allowed_paths = ('/home/');
+
+# ARGS
+my ($trypass, $src, $dest, $symlink)=@ARGV;
+die("Invalid password\n") unless $trypass and $password and $trypass eq $password;
+die("Absolute path required\n") unless $src =~ /^\//;
+die("Paths containing '..' are disallowed\n") if $src =~ /\/\.\.\//;
+my $ok=0;
+foreach my $dir (@allowed_paths) {
+ my $re="^$dir";
+ $re =~ s/\//\\\//g;
+ if ($src =~ /$re/) {
+ $ok=1;
+ last;
+ }
+}
+die("Not an allowed source path\n") unless $ok;
+
+# CP
+unlink($dest);
+if ($symlink) {
+ symlink($src, $dest);
+} else {
+ copy($src,$dest);
+}
+exit;
+__END__
+Copyright (c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
diff -r 000000000000 -r 9974ff5df008 data_nfs/link_path.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/link_path.xml Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,22 @@
+
+Import file via NFS path
+link_path.pl $password $source $dest $symlink
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+This tool allows staff to import a file into Galaxy from NFS path.
+
+You will have to edit the imported file's metadata to assign it to the appropriate datatype.
+
+