comparison data_nfs/galaxy_import.pl @ 0:9974ff5df008 default tip

Uploaded
author edward-kirton
date Thu, 01 Dec 2011 20:41:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9974ff5df008
1 #!/usr/bin/perl
2
3 ## NB: THIS TOOL ASSUMES EMAIL ADDRESSES ARE GALAXY LOGINS
4 ## MODIFICATIONS WILL BE REQUIRED IF YOU USE LDAP AUTHENTICATION
5
6 use warnings;
7 use strict;
8 use Cwd qw/abs_path/;
9 use Env qw/EMAIL/;
10 use Getopt::Long;
11
12 ## SETTINGS
13 my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
14 my $admin_email = 'admin@yoursite.gov';
15
16 ## HELP
17 my $usage=<<'ENDHERE';
18 NAME:
19 galaxy_import.pl
20 PURPOSE:
21 To queue files to copy to Galaxy user's FTP folder for subsequent import into Galaxy.
22 INPUT:
23 @ARGV : one or more paths to copy (by default, skip folders; see -r)
24 -r|recurse : recursively copy folders
25 -e|email <email> : your email is your Galaxy user ID; only required if $EMAIL is not defined
26 NB: email is case sensitive, must be same as Galaxy login.
27 OUTPUT:
28 stderr : error/warning messages
29 -dev : copy to Galaxy/Dev instead of Galaxy/JGI (for developers only)
30 NOTES:
31 - ONLY *WORLD* READABLE PATHS CAN BE COPIED
32 - user's environment variable, $EMAIL, may be set to make the --email option unnecessary
33 - the copied files must be preprocessed by the galaxy copy daemon, which runs every 2min
34 - you must import the files into Galaxy within 1 month or they will be purged!
35 ENDHERE
36
37 ## OPTIONS
38 my ($help,$email,$recurse,$dev);
39 GetOptions(
40 'help' => \$help,
41 'email=s' => \$email,
42 'recurse' => \$recurse,
43 'dev' => \$dev
44 );
45
46 ## VALIDATE INPUT
47 if (!@ARGV or $help) {
48 print $usage;
49 exit;
50 }
51 if ($email) {
52 1;
53 } elsif ($EMAIL) {
54 $email=$EMAIL;
55 } else {
56 die("Either --email or environment variable \$EMAIL is required\n");
57 }
58 die("This doesn't look like a valid email: $email\n") unless $email =~ /^\S+@\w+\.\w+$/;
59 die("Queue file, $queue_file, does not exist; please notify Galaxy administrator at $admin_email\n") unless -f $queue_file;
60
61 # VALIDATE PATHS
62 my %paths=();
63 foreach my $path (@ARGV) {
64 $path = abs_path($path);
65 if (-f $path) {
66 if (exists($paths{$path})) {
67 warn("Skipping duplicate file, $path\n");
68 } else {
69 $paths{$path}=1;
70 }
71 } elsif (-d $path) {
72 if ($recurse) {
73 if (exists($paths{$path})) {
74 warn("Skipping duplicate folder, $path\n");
75 } else {
76 $paths{$path}=1;
77 }
78 } else {
79 warn("Skipping folder, $path; use -r to process folders\n");
80 }
81 } else {
82 warn("Invalid path, $path\n");
83 }
84 }
85
86 # APPEND QUEUE FILE
87 my @paths=keys %paths;
88 exit unless @paths;
89 open(QUEUE, ">>$queue_file") or die($!);
90 while (@paths) {
91 my $path=shift @paths;
92 print QUEUE join("\t", $email, $path),"\n";
93 }
94 close QUEUE;
95 exit;
96 __END__
97 Copyright(c) 2011 US DOE Joint Genome Institute.
98 Use freely under the same license as Galaxy itself.