# HG changeset patch # User edward-kirton # Date 1322790096 18000 # Node ID 9974ff5df008526c4fcbc1452c1be9a9aae80e0e Uploaded diff -r 000000000000 -r 9974ff5df008 data_nfs/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/README Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,14 @@ +There are four tools contained herein, but only two are required. + +The recommended way to import data into galaxy is by using galaxy_import.pl from the command-line; +the galaxy user must also run the gcpd.pl script periodically (e.g. every 2 min via cron). + +The older tool for copying/symlinking to import is included by not recommended as it requires the user +to manually edit the metadata for each file imported. + +The recommended way to export data is to, again, use the FTP method (cp_ftp tool). + +The older tool to export to NFS path is included if you prefer it. + +Author/Support: ESKirton@LBL.gov +License: Copyright(c) 2011 US DOE Joint Genome Institute; use freely under the same license as Galaxy itself. diff -r 000000000000 -r 9974ff5df008 data_nfs/cp_ftp.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/cp_ftp.pl Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,40 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use File::Copy; +use File::Basename; + +# SETTINGS +my $autocreate=1; + +# VALIDATE INPUT +my ($ftp_dir, $user, $logfile, @files)=@ARGV; +die("This galaxy instance does not have FTP enabled\n") unless $ftp_dir; # i.e. in universe_wsgi.ini +die("FTP dir, $ftp_dir, does not exist!\n") unless -d $ftp_dir; +die("Invalid email address: $user\n") if $user =~ /^[\.\/]/; +my $dest="$ftp_dir/$user"; +unless (-d $dest) { + if ($autocreate) { + mkdir($dest) or die("Unable to mkdir, $dest: $!\n"); + chmod 0775, $dest or die("Unable to chmod $dest: $!\n"); + } else { + die("User $user does not have an FTP folder\n"); + } +} + +# COPY FILES, WRITE TO LOG +open(OUT, ">$logfile") or die($!); +while (@files) { + my $file=shift @files or die("Source filename required\n"); + my $name=shift @files or die("Destination filename required\n"); + die("Source file, $file, does not exist\n") unless -e $file; + copy($file, "$dest/$name") or die($!); + print OUT "Copied $name\n"; +} +close OUT; +print "Please delete the files from your FTP folder after downloading.\n"; +exit; +__END__ +Copyright (c) 2011 US DOE Joint Genome Institute. +Use freely under the same license as Galaxy itself. diff -r 000000000000 -r 9974ff5df008 data_nfs/cp_ftp.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/cp_ftp.xml Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,28 @@ + + +Download large files via FTP +cp_ftp.pl $__app__.config.ftp_upload_dir $__user_email__ $logfile +#for $i in $files +${i.file} ${i.name} +#end for + + + + + + + + + + + +**What it does** + +Use this tool to copy files to your FTP folder. + +Use an FTP client to connect to the Galaxy FTP server and use your usual Galaxy login. + +Please delete the files from your FTP folder after you have downloaded them. + + diff -r 000000000000 -r 9974ff5df008 data_nfs/export.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/export.pl Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +use strict; +use File::Copy; +use File::Basename; + +# SETTINGS +my $password='jgi'; # changeme + +# ARGS +my ($trypass, $symlink, $dest, $logfile, @files)=@ARGV; +die("Invalid password\n") unless $trypass and $password and $trypass eq $password; +die("Absolute path required\n") unless $dest =~ /^\//; +die("Paths containing '..' are disallowed\n") if $dest =~ /\/\.\.\//; +die("Only /home/*, /house/*, and /ifs/* paths are allowed\n") unless $dest =~ /^\/home/ or $dest =~ /^\/house/ or $dest =~ /^\/ifs/; +die("Destination folder does not exist: $dest\n") unless -e $dest; +die("Destination path is not a folder: $dest\n") unless -d $dest; + +# CP +open(OUT, ">$logfile") or die($!); +while (@files) { + my $file=shift @files or die("Source filename required\n"); + my $name=shift @files or die("Destination filename required\n"); + print OUT "$file -> $dest/$name\n"; + if ($symlink) { + symlink($file, "$dest/$name"); + } else { + copy($file, "$dest/$name"); + } +} +close OUT; +print "Exported ", scalar(@files), " to $dest\n"; +exit; +__END__ +Copyright (c) 2011 US DOE Joint Genome Institute. +Use freely under the same license as Galaxy itself. diff -r 000000000000 -r 9974ff5df008 data_nfs/export.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/export.xml Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,30 @@ + +files to NFS path +export.pl $password $symlink $dest $logfile +#for $i in $files +${i.file} ${i.name} +#end for + + + + + + + + + + + + + + + + + +**What it does** + +This tool allows staff to export files from Galaxy to a specified NFS path. + +The destination folder must be writable by the Galaxy user. It is suggested that you create a folder ~/dropbox and chmod 777 it. + + diff -r 000000000000 -r 9974ff5df008 data_nfs/galaxy_import.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/galaxy_import.pl Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,98 @@ +#!/usr/bin/perl + +## NB: THIS TOOL ASSUMES EMAIL ADDRESSES ARE GALAXY LOGINS +## MODIFICATIONS WILL BE REQUIRED IF YOU USE LDAP AUTHENTICATION + +use warnings; +use strict; +use Cwd qw/abs_path/; +use Env qw/EMAIL/; +use Getopt::Long; + +## SETTINGS +my $queue_file = '/some/world/accessible/nfs/import_queue.txt'; +my $admin_email = 'admin@yoursite.gov'; + +## HELP +my $usage=<<'ENDHERE'; +NAME: + galaxy_import.pl +PURPOSE: + To queue files to copy to Galaxy user's FTP folder for subsequent import into Galaxy. +INPUT: + @ARGV : one or more paths to copy (by default, skip folders; see -r) + -r|recurse : recursively copy folders + -e|email : your email is your Galaxy user ID; only required if $EMAIL is not defined + NB: email is case sensitive, must be same as Galaxy login. +OUTPUT: + stderr : error/warning messages + -dev : copy to Galaxy/Dev instead of Galaxy/JGI (for developers only) +NOTES: + - ONLY *WORLD* READABLE PATHS CAN BE COPIED + - user's environment variable, $EMAIL, may be set to make the --email option unnecessary + - the copied files must be preprocessed by the galaxy copy daemon, which runs every 2min + - you must import the files into Galaxy within 1 month or they will be purged! +ENDHERE + +## OPTIONS +my ($help,$email,$recurse,$dev); +GetOptions( + 'help' => \$help, + 'email=s' => \$email, + 'recurse' => \$recurse, + 'dev' => \$dev +); + +## VALIDATE INPUT +if (!@ARGV or $help) { + print $usage; + exit; +} +if ($email) { + 1; +} elsif ($EMAIL) { + $email=$EMAIL; +} else { + die("Either --email or environment variable \$EMAIL is required\n"); +} +die("This doesn't look like a valid email: $email\n") unless $email =~ /^\S+@\w+\.\w+$/; +die("Queue file, $queue_file, does not exist; please notify Galaxy administrator at $admin_email\n") unless -f $queue_file; + +# VALIDATE PATHS +my %paths=(); +foreach my $path (@ARGV) { + $path = abs_path($path); + if (-f $path) { + if (exists($paths{$path})) { + warn("Skipping duplicate file, $path\n"); + } else { + $paths{$path}=1; + } + } elsif (-d $path) { + if ($recurse) { + if (exists($paths{$path})) { + warn("Skipping duplicate folder, $path\n"); + } else { + $paths{$path}=1; + } + } else { + warn("Skipping folder, $path; use -r to process folders\n"); + } + } else { + warn("Invalid path, $path\n"); + } +} + +# APPEND QUEUE FILE +my @paths=keys %paths; +exit unless @paths; +open(QUEUE, ">>$queue_file") or die($!); +while (@paths) { + my $path=shift @paths; + print QUEUE join("\t", $email, $path),"\n"; +} +close QUEUE; +exit; +__END__ +Copyright(c) 2011 US DOE Joint Genome Institute. +Use freely under the same license as Galaxy itself. diff -r 000000000000 -r 9974ff5df008 data_nfs/gcpd.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/gcpd.pl Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,128 @@ +#!/usr/bin/perl + +# SET UP A CRON JOB THAT RUNS FREQUENTLY (EG. EVERY 2 MIN) + +use warnings; +use strict; +use File::Copy; +use File::Which; +use File::Basename; + +## SETTINGS +my $queue_file = '/some/world/accessible/nfs/import_queue.txt'; +my $ftp_dir = '/your/galaxy/ftp'; +my $admin_email = 'admin@yoursite.gov'; + +# INPUT +die("No args expected\n") if @ARGV; +die("Queue file does not exist: $queue_file\n") unless -f $queue_file; +die("FTP folder does not exist: $ftp_dir\n") unless -d $ftp_dir; + +# EXIT IF EMPTY +exit unless -s $queue_file; + +# CHECK IF SENDMAIL AVAILABLE +our $sendmail=which('sendmail'); # undef if not found + +# MOVE QUEUE, CREATE NEW EMPTY FILE +my $current_queue_file="$queue_file.$$"; +move($queue_file,$current_queue_file); +open(Q, ">$queue_file") or die($!); +close(Q) or die($!); +chmod 0666, $queue_file or die("Unable to chmod queue file: $!\n"); + +# LOAD QUEUE +my @queue=(); +open(Q, "<$current_queue_file") or die($!); +while () { + chomp; + my @row=split(/\t/); + push @queue, \@row; +} +close Q; +unlink($current_queue_file); + +# COPY FILES +my %notify=(); +while (@queue) { + my $row=shift @queue; + my ($email,$path)=@$row; + copypath($path,"$ftp_dir/$email"); + if ($sendmail) { + $notify{$email}=[] unless exists($notify{$email}); + push @{$notify{$email}}, $path; + } +} + +# SEND NOTIFICATION EMAILS +foreach my $email (keys %notify) { + my $msg="The following files are available in your FTP folder; use the Upload tool to import them.\n\n" + . join("\n", @{$notify{$email}})."\n"; + email($sendmail,$admin_email,'Galaxy import complete',$email,$msg); +} + +## SUBROUTINES + +# COPY FILE/FOLDER TO DEST FOLDER +sub copypath { + my ($src,$destdir)=@_; + return unless $src and $destdir; + $src = $1 if $src =~ /^(.+)\/$/; + $destdir = $1 if $destdir =~ /^(.+)\/$/; + # make dest if not exist + unless (-d $destdir) { + mkdir($destdir) or warn("Unable to mkdir $destdir: $!\n"); + chmod 0775, $destdir or warn("Unable to chmod dir, $destdir: $!\n"); + } + if (-d $src) { + #print "Recursively copy folder $src to $destdir\n"; + my ($subdir,$parentdir)=fileparse($src); + $destdir .= "/$subdir"; + # make dest if not exist + unless (-d $destdir) { + mkdir($destdir) or warn("Unable to mkdir $destdir: $!\n"); + chmod 0775, $destdir or warn("Unable to chmod dir, $destdir: $!\n"); + } + # process src folder + unless (opendir(DIR, $src)) { + warn("Unable to open dir, $src\n"); + return; + } + my @files= grep { $_ !~ /^\./ } readdir DIR or warn("Unable to readdir, $src: $!\n"); + closedir(DIR); + #print "Folder, $src, contains ", scalar(@files), " files\n"; + foreach my $file (@files) { + copypath("$src/$file",$destdir); + } + } elsif (-f $src) { + my ($file,$dir)=fileparse($src); + my $destfile="$destdir/$file"; + #print "Copy file, $src to $destfile\n"; + copy($src,$destdir) or warn("Unable to copy $src: $!\n"); + chmod 0664, $destfile or warn("Unable to chmod file, $destfile: $!\n"); + } else { + warn("Invalid path, $src\n"); + } +} + +sub email { + my ($sendmail, $from, $subj, $to, $msg)=@_; + return unless defined($sendmail); + die("From email not defined\n") unless $from; + die("Subj not defined\n") unless $subj; + die("Receipient email not defined\n") unless $to; + die("Message not defined\n") unless $msg; + my $email= + "Reply-to: $from\n". + "Subject: $subj\n". + "To: $to\n". + "Content-type: text/plain\n\n". + $msg; + open(SENDMAIL, "|$sendmail -t") or die "Cannot open $sendmail: $!"; + print SENDMAIL $email; + close(SENDMAIL); +} + +__END__ +Copyright(c) 2011 US DOE Joint Genome Institute. +Use freely under the same license as Galaxy itself. diff -r 000000000000 -r 9974ff5df008 data_nfs/link_path.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/link_path.pl Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +# THIS TOOL HAS BEEN DEPRECATED IN FAVOR OF THE galaxy_import.pl AND gcpd.pl METHOD +# WHICH DON'T REQUIRE SETTING METADATA MANUALLY FOR EACH FILE. + +use strict; +use File::Copy; + +# CONFIG +my $password='jgi'; # CHANGEME +my @allowed_paths = ('/home/'); + +# ARGS +my ($trypass, $src, $dest, $symlink)=@ARGV; +die("Invalid password\n") unless $trypass and $password and $trypass eq $password; +die("Absolute path required\n") unless $src =~ /^\//; +die("Paths containing '..' are disallowed\n") if $src =~ /\/\.\.\//; +my $ok=0; +foreach my $dir (@allowed_paths) { + my $re="^$dir"; + $re =~ s/\//\\\//g; + if ($src =~ /$re/) { + $ok=1; + last; + } +} +die("Not an allowed source path\n") unless $ok; + +# CP +unlink($dest); +if ($symlink) { + symlink($src, $dest); +} else { + copy($src,$dest); +} +exit; +__END__ +Copyright (c) 2011 US DOE Joint Genome Institute. +Use freely under the same license as Galaxy itself. diff -r 000000000000 -r 9974ff5df008 data_nfs/link_path.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_nfs/link_path.xml Thu Dec 01 20:41:36 2011 -0500 @@ -0,0 +1,22 @@ + +Import file via NFS path +link_path.pl $password $source $dest $symlink + + + + + + + + + + + + +**What it does** + +This tool allows staff to import a file into Galaxy from NFS path. + +You will have to edit the imported file's metadata to assign it to the appropriate datatype. + +