annotate data_nfs/galaxy_import.pl @ 0:9974ff5df008 default tip

Uploaded
author edward-kirton
date Thu, 01 Dec 2011 20:41:36 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
1 #!/usr/bin/perl
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
2
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
3 ## NB: THIS TOOL ASSUMES EMAIL ADDRESSES ARE GALAXY LOGINS
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
4 ## MODIFICATIONS WILL BE REQUIRED IF YOU USE LDAP AUTHENTICATION
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
5
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
6 use warnings;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
7 use strict;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
8 use Cwd qw/abs_path/;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
9 use Env qw/EMAIL/;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
10 use Getopt::Long;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
11
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
12 ## SETTINGS
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
13 my $queue_file = '/some/world/accessible/nfs/import_queue.txt';
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
14 my $admin_email = 'admin@yoursite.gov';
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
15
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
16 ## HELP
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
17 my $usage=<<'ENDHERE';
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
18 NAME:
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
19 galaxy_import.pl
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
20 PURPOSE:
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
21 To queue files to copy to Galaxy user's FTP folder for subsequent import into Galaxy.
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
22 INPUT:
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
23 @ARGV : one or more paths to copy (by default, skip folders; see -r)
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
24 -r|recurse : recursively copy folders
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
25 -e|email <email> : your email is your Galaxy user ID; only required if $EMAIL is not defined
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
26 NB: email is case sensitive, must be same as Galaxy login.
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
27 OUTPUT:
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
28 stderr : error/warning messages
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
29 -dev : copy to Galaxy/Dev instead of Galaxy/JGI (for developers only)
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
30 NOTES:
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
31 - ONLY *WORLD* READABLE PATHS CAN BE COPIED
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
32 - user's environment variable, $EMAIL, may be set to make the --email option unnecessary
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
33 - the copied files must be preprocessed by the galaxy copy daemon, which runs every 2min
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
34 - you must import the files into Galaxy within 1 month or they will be purged!
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
35 ENDHERE
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
36
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
37 ## OPTIONS
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
38 my ($help,$email,$recurse,$dev);
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
39 GetOptions(
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
40 'help' => \$help,
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
41 'email=s' => \$email,
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
42 'recurse' => \$recurse,
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
43 'dev' => \$dev
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
44 );
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
45
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
46 ## VALIDATE INPUT
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
47 if (!@ARGV or $help) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
48 print $usage;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
49 exit;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
50 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
51 if ($email) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
52 1;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
53 } elsif ($EMAIL) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
54 $email=$EMAIL;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
55 } else {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
56 die("Either --email or environment variable \$EMAIL is required\n");
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
57 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
58 die("This doesn't look like a valid email: $email\n") unless $email =~ /^\S+@\w+\.\w+$/;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
59 die("Queue file, $queue_file, does not exist; please notify Galaxy administrator at $admin_email\n") unless -f $queue_file;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
60
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
61 # VALIDATE PATHS
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
62 my %paths=();
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
63 foreach my $path (@ARGV) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
64 $path = abs_path($path);
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
65 if (-f $path) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
66 if (exists($paths{$path})) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
67 warn("Skipping duplicate file, $path\n");
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
68 } else {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
69 $paths{$path}=1;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
70 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
71 } elsif (-d $path) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
72 if ($recurse) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
73 if (exists($paths{$path})) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
74 warn("Skipping duplicate folder, $path\n");
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
75 } else {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
76 $paths{$path}=1;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
77 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
78 } else {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
79 warn("Skipping folder, $path; use -r to process folders\n");
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
80 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
81 } else {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
82 warn("Invalid path, $path\n");
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
83 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
84 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
85
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
86 # APPEND QUEUE FILE
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
87 my @paths=keys %paths;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
88 exit unless @paths;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
89 open(QUEUE, ">>$queue_file") or die($!);
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
90 while (@paths) {
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
91 my $path=shift @paths;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
92 print QUEUE join("\t", $email, $path),"\n";
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
93 }
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
94 close QUEUE;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
95 exit;
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
96 __END__
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
97 Copyright(c) 2011 US DOE Joint Genome Institute.
9974ff5df008 Uploaded
edward-kirton
parents:
diff changeset
98 Use freely under the same license as Galaxy itself.