changeset 0:9974ff5df008 default tip

Uploaded
author edward-kirton
date Thu, 01 Dec 2011 20:41:36 -0500
parents
children
files data_nfs/README data_nfs/cp_ftp.pl data_nfs/cp_ftp.xml data_nfs/export.pl data_nfs/export.xml data_nfs/galaxy_import.pl data_nfs/gcpd.pl data_nfs/link_path.pl data_nfs/link_path.xml
diffstat 9 files changed, 435 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/README	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,14 @@
+There are four tools contained herein, but only two are required.
+
+The recommended way to import data into galaxy is by using galaxy_import.pl from the command-line;
+the galaxy user must also run the gcpd.pl script periodically (e.g. every 2 min via cron).
+
+The older tool for copying/symlinking to import is included by not recommended as it requires the user
+to manually edit the metadata for each file imported.
+
+The recommended way to export data is to, again, use the FTP method (cp_ftp tool).
+
+The older tool to export to NFS path is included if you prefer it.
+
+Author/Support: ESKirton@LBL.gov
+License: Copyright(c) 2011 US DOE Joint Genome Institute; use freely under the same license as Galaxy itself.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/cp_ftp.pl	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,40 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use File::Copy;
+use File::Basename;
+
+# SETTINGS
+my $autocreate=1;
+
+# VALIDATE INPUT
+my ($ftp_dir, $user, $logfile, @files)=@ARGV;
+die("This galaxy instance does not have FTP enabled\n") unless $ftp_dir; # i.e. in universe_wsgi.ini
+die("FTP dir, $ftp_dir, does not exist!\n") unless -d $ftp_dir;
+die("Invalid email address: $user\n") if $user =~ /^[\.\/]/;
+my $dest="$ftp_dir/$user";
+unless (-d $dest) {
+    if ($autocreate) {
+        mkdir($dest) or die("Unable to mkdir, $dest: $!\n");
+        chmod 0775, $dest or die("Unable to chmod $dest: $!\n");
+    } else {
+        die("User $user does not have an FTP folder\n");
+    }
+}
+
+# COPY FILES, WRITE TO LOG
+open(OUT, ">$logfile") or die($!);
+while (@files) {
+    my $file=shift @files or die("Source filename required\n");
+    my $name=shift @files or die("Destination filename required\n");
+    die("Source file, $file, does not exist\n") unless -e $file;
+    copy($file, "$dest/$name") or die($!);
+    print OUT "Copied $name\n";
+}
+close OUT;
+print "Please delete the files from your FTP folder after downloading.\n";
+exit;
+__END__
+Copyright (c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/cp_ftp.xml	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,28 @@
+<!-- THIS TOOL ASSUMES EMAIL ADDRESSES ARE USED AS USER IDS.
+IF YOU ARE USING LDAP AUTHENTICATION, REPLACE THE $__user_email__ BELOW -->
+<tool id="cp_ftp" name="Copy to FTP Folder" version="1.0.0">
+<description>Download large files via FTP</description>
+<command interpreter="perl">cp_ftp.pl $__app__.config.ftp_upload_dir $__user_email__ $logfile
+#for $i in $files
+${i.file} ${i.name}
+#end for
+</command>
+<inputs>
+	<repeat name="files" title="Files to export">
+		<param name="file" type="data" format="data" label="File"/>
+        <param name="name" type="text" size='20' value="" label="Filename" help='Include filetype suffix e.g. ".txt"' />
+	</repeat>
+</inputs>
+<outputs>
+    <data name="logfile" format="txt" />
+</outputs>
+<help>
+**What it does**
+
+Use this tool to copy files to your FTP folder.
+
+Use an FTP client to connect to the Galaxy FTP server and use your usual Galaxy login.
+
+Please delete the files from your FTP folder after you have downloaded them.
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/export.pl	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,36 @@
+#!/usr/bin/perl
+
+use strict;
+use File::Copy;
+use File::Basename;
+
+# SETTINGS
+my $password='jgi'; # changeme
+
+# ARGS
+my ($trypass, $symlink, $dest, $logfile, @files)=@ARGV;
+die("Invalid password\n") unless $trypass and $password and $trypass eq $password;
+die("Absolute path required\n") unless $dest =~ /^\//;
+die("Paths containing '..' are disallowed\n") if $dest =~ /\/\.\.\//;
+die("Only /home/*, /house/*, and /ifs/* paths are allowed\n") unless $dest =~ /^\/home/ or $dest =~ /^\/house/ or $dest =~ /^\/ifs/;
+die("Destination folder does not exist: $dest\n") unless -e $dest;
+die("Destination path is not a folder: $dest\n") unless -d $dest;
+
+# CP
+open(OUT, ">$logfile") or die($!);
+while (@files) {
+    my $file=shift @files or die("Source filename required\n");
+    my $name=shift @files or die("Destination filename required\n");
+    print OUT "$file -> $dest/$name\n";
+    if ($symlink) {
+        symlink($file, "$dest/$name");
+    } else {
+        copy($file, "$dest/$name");
+    }
+}
+close OUT;
+print "Exported ", scalar(@files), " to $dest\n";
+exit;
+__END__
+Copyright (c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/export.xml	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,30 @@
+<tool id="export" name="Export" version="1.0.0">
+<description>files to NFS path</description>
+<command interpreter="perl">export.pl $password $symlink $dest $logfile
+#for $i in $files
+${i.file} ${i.name}
+#end for
+</command>
+<inputs>
+    <param name="password" type="text" size='20' value='' label="Export password" help='Not your user password' />
+    <param name="symlink" type="select" display="radio" label="Copy or symlink files?">
+        <option value="0">COPY the files so I may delete the files from Galaxy</option>
+        <option value="1">SYMLINK the files; I will not delete the files from Galaxy</option>
+    </param>
+    <param name="dest" type="text" size='120' value="/house/homedirs/" label="Destination folder" help="Must be writable by Galaxy user (chmod 777)" />
+	<repeat name="files" title="Files to export">
+		<param name="file" type="data" format="data" label="File"/>
+        <param name="name" type="text" size='20' value="" label="Name" />
+	</repeat>
+</inputs>
+<outputs>
+    <data name="logfile" format="txt" />
+</outputs>
+<help>
+**What it does**
+
+This tool allows staff to export files from Galaxy to a specified NFS path.
+
+The destination folder must be writable by the Galaxy user. It is suggested that you create a folder ~/dropbox and chmod 777 it.
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/galaxy_import.pl	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+
+## NB: THIS TOOL ASSUMES EMAIL ADDRESSES ARE GALAXY LOGINS
+##     MODIFICATIONS WILL BE REQUIRED IF YOU USE LDAP AUTHENTICATION
+
+use warnings;
+use strict;
+use Cwd qw/abs_path/;
+use Env qw/EMAIL/;
+use Getopt::Long;
+
+## SETTINGS
+my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
+my $admin_email = 'admin@yoursite.gov';
+
+## HELP
+my $usage=<<'ENDHERE';
+NAME:
+    galaxy_import.pl
+PURPOSE:
+    To queue files to copy to Galaxy user's FTP folder for subsequent import into Galaxy.
+INPUT:
+    @ARGV : one or more paths to copy (by default, skip folders; see -r)
+    -r|recurse : recursively copy folders
+    -e|email <email> : your email is your Galaxy user ID; only required if $EMAIL is not defined
+        NB: email is case sensitive, must be same as Galaxy login.
+OUTPUT:
+    stderr : error/warning messages
+    -dev : copy to Galaxy/Dev instead of Galaxy/JGI (for developers only)
+NOTES:
+    - ONLY *WORLD* READABLE PATHS CAN BE COPIED
+    - user's environment variable, $EMAIL, may be set to make the --email option unnecessary
+    - the copied files must be preprocessed by the galaxy copy daemon, which runs every 2min
+    - you must import the files into Galaxy within 1 month or they will be purged!
+ENDHERE
+
+## OPTIONS
+my ($help,$email,$recurse,$dev);
+GetOptions(
+    'help' => \$help,
+    'email=s' => \$email,
+    'recurse' => \$recurse,
+    'dev' => \$dev
+);
+
+## VALIDATE INPUT
+if (!@ARGV or $help) {
+    print $usage;
+    exit;
+}
+if ($email) {
+    1;
+} elsif ($EMAIL) {
+    $email=$EMAIL;
+} else {
+    die("Either --email or environment variable \$EMAIL is required\n");
+}
+die("This doesn't look like a valid email: $email\n") unless $email =~ /^\S+@\w+\.\w+$/;
+die("Queue file, $queue_file, does not exist; please notify Galaxy administrator at $admin_email\n") unless -f $queue_file;
+
+# VALIDATE PATHS
+my %paths=();
+foreach my $path (@ARGV) {
+    $path = abs_path($path);
+    if (-f $path) {
+        if (exists($paths{$path})) {
+            warn("Skipping duplicate file, $path\n");
+        } else {
+            $paths{$path}=1;
+        }
+    } elsif (-d $path) {
+        if ($recurse) {
+            if (exists($paths{$path})) {
+                warn("Skipping duplicate folder, $path\n");
+            } else {
+                $paths{$path}=1;
+            }
+        } else {
+            warn("Skipping folder, $path; use -r to process folders\n");
+        }
+    } else {
+        warn("Invalid path, $path\n");
+    }
+}
+
+# APPEND QUEUE FILE
+my @paths=keys %paths;
+exit unless @paths;
+open(QUEUE, ">>$queue_file") or die($!);
+while (@paths) {
+    my $path=shift @paths;
+    print QUEUE join("\t", $email, $path),"\n";
+}
+close QUEUE;
+exit;
+__END__
+Copyright(c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/gcpd.pl	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,128 @@
+#!/usr/bin/perl
+
+# SET UP A CRON JOB THAT RUNS FREQUENTLY (EG. EVERY 2 MIN)
+
+use warnings;
+use strict;
+use File::Copy;
+use File::Which;
+use File::Basename;
+
+## SETTINGS
+my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
+my $ftp_dir = '/your/galaxy/ftp';
+my $admin_email = 'admin@yoursite.gov';
+
+# INPUT
+die("No args expected\n") if @ARGV;
+die("Queue file does not exist: $queue_file\n") unless -f $queue_file;
+die("FTP folder does not exist: $ftp_dir\n") unless -d $ftp_dir;
+
+# EXIT IF EMPTY
+exit unless -s $queue_file;
+
+# CHECK IF SENDMAIL AVAILABLE
+our $sendmail=which('sendmail'); # undef if not found
+
+# MOVE QUEUE, CREATE NEW EMPTY FILE
+my $current_queue_file="$queue_file.$$";
+move($queue_file,$current_queue_file);
+open(Q, ">$queue_file") or die($!);
+close(Q) or die($!);
+chmod 0666, $queue_file or die("Unable to chmod queue file: $!\n");
+
+# LOAD QUEUE
+my @queue=();
+open(Q, "<$current_queue_file") or die($!);
+while (<Q>) {
+    chomp;
+    my @row=split(/\t/);
+    push @queue, \@row;
+}
+close Q;
+unlink($current_queue_file);
+
+# COPY FILES
+my %notify=();
+while (@queue) {
+    my $row=shift @queue;
+    my ($email,$path)=@$row;
+    copypath($path,"$ftp_dir/$email");
+    if ($sendmail) {
+        $notify{$email}=[] unless exists($notify{$email});
+        push @{$notify{$email}}, $path;
+    }
+}
+
+# SEND NOTIFICATION EMAILS
+foreach my $email (keys %notify) {
+    my $msg="The following files are available in your FTP folder; use the Upload tool to import them.\n\n"
+        . join("\n", @{$notify{$email}})."\n";
+    email($sendmail,$admin_email,'Galaxy import complete',$email,$msg);
+}
+
+## SUBROUTINES
+
+# COPY FILE/FOLDER TO DEST FOLDER
+sub copypath {
+    my ($src,$destdir)=@_;
+    return unless $src and $destdir;
+    $src = $1 if $src =~ /^(.+)\/$/;
+    $destdir = $1 if $destdir =~ /^(.+)\/$/;
+	# make dest if not exist
+	unless (-d $destdir) {
+		mkdir($destdir) or warn("Unable to mkdir $destdir: $!\n");
+		chmod 0775, $destdir or warn("Unable to chmod dir, $destdir: $!\n");
+	}
+    if (-d $src) {
+        #print "Recursively copy folder $src to $destdir\n";
+		my ($subdir,$parentdir)=fileparse($src);
+		$destdir .= "/$subdir";
+        # make dest if not exist
+        unless (-d $destdir) {
+            mkdir($destdir) or warn("Unable to mkdir $destdir: $!\n");
+            chmod 0775, $destdir or warn("Unable to chmod dir, $destdir: $!\n");
+        }
+        # process src folder
+        unless (opendir(DIR, $src)) {
+            warn("Unable to open dir, $src\n");
+            return;
+        }
+        my @files= grep { $_ !~ /^\./ } readdir DIR or warn("Unable to readdir, $src: $!\n");
+        closedir(DIR);
+        #print "Folder, $src, contains ", scalar(@files), " files\n";
+        foreach my $file (@files) {
+            copypath("$src/$file",$destdir);
+        }
+    } elsif (-f $src) {
+		my ($file,$dir)=fileparse($src);
+		my $destfile="$destdir/$file";
+		#print "Copy file, $src to $destfile\n";
+        copy($src,$destdir) or warn("Unable to copy $src: $!\n");
+        chmod 0664, $destfile or warn("Unable to chmod file, $destfile: $!\n");
+    } else {
+        warn("Invalid path, $src\n");
+    }
+}
+
+sub email {
+    my ($sendmail, $from, $subj, $to, $msg)=@_;
+    return unless defined($sendmail);
+	die("From email not defined\n") unless $from;
+	die("Subj not defined\n") unless $subj;
+	die("Receipient email not defined\n") unless $to;
+	die("Message not defined\n") unless $msg;
+	my $email=
+        "Reply-to: $from\n".
+        "Subject: $subj\n".
+        "To: $to\n".
+        "Content-type: text/plain\n\n".
+        $msg;
+    open(SENDMAIL, "|$sendmail -t") or die "Cannot open $sendmail: $!";
+    print SENDMAIL $email;
+    close(SENDMAIL);
+}
+
+__END__
+Copyright(c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/link_path.pl	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+# THIS TOOL HAS BEEN DEPRECATED IN FAVOR OF THE galaxy_import.pl AND gcpd.pl METHOD
+# WHICH DON'T REQUIRE SETTING METADATA MANUALLY FOR EACH FILE.
+
+use strict;
+use File::Copy;
+
+# CONFIG
+my $password='jgi'; # CHANGEME
+my @allowed_paths = ('/home/');
+
+# ARGS
+my ($trypass, $src, $dest, $symlink)=@ARGV;
+die("Invalid password\n") unless $trypass and $password and $trypass eq $password;
+die("Absolute path required\n") unless $src =~ /^\//;
+die("Paths containing '..' are disallowed\n") if $src =~ /\/\.\.\//;
+my $ok=0;
+foreach my $dir (@allowed_paths) {
+    my $re="^$dir";
+    $re =~ s/\//\\\//g;
+    if ($src =~ /$re/) {
+        $ok=1;
+        last;
+    }
+}
+die("Not an allowed source path\n") unless $ok;
+
+# CP
+unlink($dest);
+if ($symlink) {
+    symlink($src, $dest);
+} else {
+    copy($src,$dest);
+}
+exit;
+__END__
+Copyright (c) 2011 US DOE Joint Genome Institute.
+Use freely under the same license as Galaxy itself.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_nfs/link_path.xml	Thu Dec 01 20:41:36 2011 -0500
@@ -0,0 +1,22 @@
+<tool id="link_path" name="Link Path" version="1.0.0">
+<description>Import file via NFS path</description>
+<command interpreter="perl">link_path.pl $password $source $dest $symlink</command>
+<inputs>
+    <param name="password" type="text" size='20' value='' label="Link password" help='Not your user password' />
+    <param name="source" type="text" size='100' value='' label="Complete pathname" />
+    <param name="symlink" type="select" display="radio" label="Is that a permanent location?">
+        <option value="0">No, COPY the file</option>
+        <option value="1">Yes, SYMLINK the file</option>
+    </param>
+</inputs>
+<outputs>
+    <data format="data" name="dest" />
+</outputs>
+<help>
+**What it does**
+
+This tool allows staff to import a file into Galaxy from NFS path.
+
+You will have to edit the imported file's metadata to assign it to the appropriate datatype.
+</help>
+</tool>