view data_nfs/galaxy_import.pl @ 0:9974ff5df008 default tip

Uploaded
author edward-kirton
date Thu, 01 Dec 2011 20:41:36 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl

## NB: THIS TOOL ASSUMES EMAIL ADDRESSES ARE GALAXY LOGINS
##     MODIFICATIONS WILL BE REQUIRED IF YOU USE LDAP AUTHENTICATION

use warnings;
use strict;
use Cwd qw/abs_path/;
use Env qw/EMAIL/;
use Getopt::Long;

## SETTINGS
my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
my $admin_email = 'admin@yoursite.gov';

## HELP
my $usage=<<'ENDHERE';
NAME:
    galaxy_import.pl
PURPOSE:
    To queue files to copy to Galaxy user's FTP folder for subsequent import into Galaxy.
INPUT:
    @ARGV : one or more paths to copy (by default, skip folders; see -r)
    -r|recurse : recursively copy folders
    -e|email <email> : your email is your Galaxy user ID; only required if $EMAIL is not defined
        NB: email is case sensitive, must be same as Galaxy login.
OUTPUT:
    stderr : error/warning messages
    -dev : copy to Galaxy/Dev instead of Galaxy/JGI (for developers only)
NOTES:
    - ONLY *WORLD* READABLE PATHS CAN BE COPIED
    - user's environment variable, $EMAIL, may be set to make the --email option unnecessary
    - the copied files must be preprocessed by the galaxy copy daemon, which runs every 2min
    - you must import the files into Galaxy within 1 month or they will be purged!
ENDHERE

## OPTIONS
my ($help,$email,$recurse,$dev);
GetOptions(
    'help' => \$help,
    'email=s' => \$email,
    'recurse' => \$recurse,
    'dev' => \$dev
);

## VALIDATE INPUT
if (!@ARGV or $help) {
    print $usage;
    exit;
}
if ($email) {
    1;
} elsif ($EMAIL) {
    $email=$EMAIL;
} else {
    die("Either --email or environment variable \$EMAIL is required\n");
}
die("This doesn't look like a valid email: $email\n") unless $email =~ /^\S+@\w+\.\w+$/;
die("Queue file, $queue_file, does not exist; please notify Galaxy administrator at $admin_email\n") unless -f $queue_file;

# VALIDATE PATHS
my %paths=();
foreach my $path (@ARGV) {
    $path = abs_path($path);
    if (-f $path) {
        if (exists($paths{$path})) {
            warn("Skipping duplicate file, $path\n");
        } else {
            $paths{$path}=1;
        }
    } elsif (-d $path) {
        if ($recurse) {
            if (exists($paths{$path})) {
                warn("Skipping duplicate folder, $path\n");
            } else {
                $paths{$path}=1;
            }
        } else {
            warn("Skipping folder, $path; use -r to process folders\n");
        }
    } else {
        warn("Invalid path, $path\n");
    }
}

# APPEND QUEUE FILE
my @paths=keys %paths;
exit unless @paths;
open(QUEUE, ">>$queue_file") or die($!);
while (@paths) {
    my $path=shift @paths;
    print QUEUE join("\t", $email, $path),"\n";
}
close QUEUE;
exit;
__END__
Copyright(c) 2011 US DOE Joint Genome Institute.
Use freely under the same license as Galaxy itself.