annotate versioned_data_cache_clear.py @ 2:269d246ce6d0 draft default tip

Uploaded
author damion
date Fri, 23 Oct 2015 17:53:29 -0400
parents 5c5027485f7d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
1 #!/usr/bin/python
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
2
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
3 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
4 ****************************** versioned_data_cache_clear.py ******************************
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
5 Call this script directly to clear out all but the latest galaxy Versioned Data data library
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
6 and server data store cached folder versions.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
7
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
8 SUGGEST RUNNING THIS UNDER GALAXY OR LESS PRIVILEGED USER, BUT the versioneddata_api_key file does need to be readable by the user.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
9
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
10 """
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
11 import vdb_retrieval
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
12 import vdb_common
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
13 import glob
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
14 import os
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
15
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
16 # Note that globals from vdb_retrieval can be referenced by prefixing with vdb_retrieval.XYZ
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
17 # Note that this script uses the admin_api established in vdb_retrieval.py
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
18
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
19 retrieval_obj = vdb_retrieval.VDBRetrieval()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
20 retrieval_obj.set_admin_api()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
21 retrieval_obj.user_api = retrieval_obj.admin_api
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
22 retrieval_obj.set_datastores()
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
23
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
24 workflow_keepers = [] #stack of Versioned Data library dataset_ids that if found in a workflow data input folder key name, can be saved; otherwise remove folder.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
25 library_folder_deletes = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
26 library_dataset_deletes = []
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
27
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
28 # Cycle through datastores, listing subfolders under each, sorted.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
29 # Permanently delete all but latest subfolder.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
30 for data_store in retrieval_obj.data_stores:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
31 spec_file_id = data_store['id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
32 # STEP 1: Determine data store type and location
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
33 data_store_spec = retrieval_obj.admin_api.libraries.show_folder(retrieval_obj.library_id, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
34 data_store_type = retrieval_obj.test_data_store_type(data_store_spec['name'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
35
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
36 if not data_store_type in 'folder biomaj': # Folders are static - they don't do caching.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
37
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
38 base_folder_id = data_store_spec['folder_id']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
39 ds_obj = retrieval_obj.get_data_store_gateway(data_store_type, spec_file_id)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
40
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
41 print
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
42
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
43 #Cycle through library tree; have to look at the whole thing since there's no /[string]/* wildcard search:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
44 folders = retrieval_obj.get_library_folders(ds_obj.library_label_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
45 for ptr, folder in enumerate(folders):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
46
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
47 # Ignore folder that represents data store itself:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
48 if ptr == 0:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
49 print 'Data Store ::' + folder['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
50
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
51 # Keep most recent cache item
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
52 elif ptr == len(folders)-1:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
53 print 'Cached Version ::' + folder['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
54 workflow_keepers.extend(folder['files'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
55
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
56 # Drop version caches that are further in the past:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
57 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
58 print 'Clearing version cache:' + folder['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
59 library_folder_deletes.extend(folder['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
60 library_dataset_deletes.extend(folder['files'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
61
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
62
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
63 # Now auto-clean versioned/ folders too?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
64 print "Server loc: " + ds_obj.data_store_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
65
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
66 items = os.listdir(ds_obj.data_store_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
67 items = sorted(items, key=lambda el: vdb_common.natural_sort_key(el), reverse=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
68 count = 0
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
69 for name in items:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
70
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
71 # If it is a directory and it isn't the master or symlinked "current" one:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
72 # Add ability to skip sym-linked folders too?
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
73 version_folder=os.path.join(ds_obj.data_store_path, name)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
74 if not name == 'master' \
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
75 and os.path.isdir(version_folder) \
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
76 and not os.path.islink(version_folder):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
77
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
78 count += 1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
79 if count == 1:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
80 print "Keeping cache:" + name
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
81 else:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
82 print "Dropping cache:" + name
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
83 for root2, dirs2, files2 in os.walk(version_folder):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
84 for version_file in files2:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
85 full_path = os.path.join(root2, version_file)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
86 print "Removing " + full_path
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
87 os.remove(full_path)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
88 #Not expecting any subfolders here.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
89
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
90 os.rmdir(version_folder)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
91
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
92
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
93 # Permanently delete specific data library datasets:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
94 for item in library_dataset_deletes:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
95 retrieval_obj.admin_api.libraries.delete_library_dataset(retrieval_obj.library_id, item['id'], purged=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
96
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
97
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
98 # Newer Bioblend API method for deleting galaxy library folders.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
99 # OLD Galaxy way possible: http DELETE request to {{url}}/api/folders/{{encoded_folder_id}}?key={{key}}
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
100 if 'folders' in dir(retrieval_obj.admin_api):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
101 for folder in library_folder_deletes:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
102 retrieval_obj.admin_api.folders.delete(folder['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
103
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
104
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
105 print workflow_keepers
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
106
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
107 workflow_cache_folders = retrieval_obj.get_library_folders('/'+ vdb_retrieval.VDB_WORKFLOW_CACHE_FOLDER_NAME+'/')
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
108
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
109 for folder in workflow_cache_folders:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
110 dataset_ids = folder['name'].split('_') #input dataset ids separated by underscore
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
111 count = 0
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
112 for id in dataset_ids:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
113 if id in workflow_keepers:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
114 count += 1
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
115
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
116 # If every input dataset in workflow cache exists in library cache, then keep it.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
117 if count == len(dataset_ids):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
118 continue
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
119
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
120 # We have one or more cached datasets to drop.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
121 print "Dropping workflow cache: " + folder['name']
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
122 for id in [item['id'] for item in folder['files']]:
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
123 print id
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
124 retrieval_obj.admin_api.libraries.delete_library_dataset(retrieval_obj.library_id, id, purged=True)
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
125
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
126 # NOW DELETE WORKFLOW FOLDER.
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
127 if 'folders' in dir(retrieval_obj.admin_api):
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
128 retrieval_obj.admin_api.folders.delete(folder['id'])
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
129
5c5027485f7d Uploaded correct file
damion
parents:
diff changeset
130