comparison create_or_update_organism.py @ 10:d72192ec8e39 draft

"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 08015be1ee8a784e0619f961aaa724857debfd6f"
author gga
date Mon, 02 Dec 2019 05:46:45 -0500
parents 496444ad9291
children 7609529caafa
comparison
equal deleted inserted replaced
9:29ce13734a5c 10:d72192ec8e39
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 from __future__ import print_function 2 from __future__ import print_function
3 3
4 import argparse 4 import argparse
5 import glob
5 import json 6 import json
6 import logging 7 import logging
7 import os 8 import os
8 import shutil 9 import shutil
10 import stat
9 import subprocess 11 import subprocess
10 import sys 12 import sys
13 import tarfile
11 import tempfile 14 import tempfile
12 import time 15 import time
13 16
14 17 from apollo import accessible_organisms
15 from webapollo import GuessOrg, OrgOrGuess, PermissionCheck, WAAuth, WebApolloInstance 18 from apollo.util import GuessOrg, OrgOrGuess
19
20 from arrow.apollo import get_apollo_instance
21
22 from webapollo import UserObj, handle_credentials
23
16 logging.basicConfig(level=logging.INFO) 24 logging.basicConfig(level=logging.INFO)
17 log = logging.getLogger(__name__) 25 log = logging.getLogger(__name__)
18 26
19 27
20 def IsBlatEnabled(): 28 def IsBlatEnabled():
25 return True 33 return True
26 else: 34 else:
27 return False 35 return False
28 36
29 37
38 def IsOrgCNSuffixEnabled():
39 if 'GALAXY_APOLLO_ORG_SUFFIX' not in os.environ:
40 return False
41 value = os.environ['GALAXY_APOLLO_ORG_SUFFIX'].lower()
42 if value in ('id', 'email'):
43 return value
44
45 return False
46
47
48 def IsRemote():
49 return 'GALAXY_SHARED_DIR' not in os.environ or len(os.environ['GALAXY_SHARED_DIR'].lower().strip()) == 0
50
51
30 if __name__ == '__main__': 52 if __name__ == '__main__':
31 parser = argparse.ArgumentParser(description='Create or update an organism in an Apollo instance') 53 parser = argparse.ArgumentParser(description='Create or update an organism in an Apollo instance')
32 WAAuth(parser) 54 parser.add_argument('jbrowse_src', help='Source JBrowse Data Directory')
33 parser.add_argument('jbrowse_src', help='Old JBrowse Data Directory') 55 parser.add_argument('jbrowse', help='Destination JBrowse Data Directory')
34 parser.add_argument('jbrowse', help='JBrowse Data Directory')
35 parser.add_argument('email', help='User Email') 56 parser.add_argument('email', help='User Email')
36 OrgOrGuess(parser) 57 OrgOrGuess(parser)
37 parser.add_argument('--genus', help='Organism Genus') 58 parser.add_argument('--genus', help='Organism Genus')
38 parser.add_argument('--species', help='Organism Species') 59 parser.add_argument('--species', help='Organism Species')
39 parser.add_argument('--public', action='store_true', help='Make organism public') 60 parser.add_argument('--public', action='store_true', help='Make organism public')
40 parser.add_argument('--group', help='Give access to a user group') 61 parser.add_argument('--group', help='Give access to a user group')
41 parser.add_argument('--remove_old_directory', action='store_true', help='Remove old directory') 62 parser.add_argument('--remove_old_directory', action='store_true', help='Remove old directory')
63 parser.add_argument('--no_reload_sequences', action='store_true', help='Disable update genome sequence')
64 parser.add_argument('--userid', help='User unique id')
42 args = parser.parse_args() 65 args = parser.parse_args()
43 CHUNK_SIZE = 2**20 66 CHUNK_SIZE = 2**20
44 blat_db = None 67 blat_db = None
45 68
69 path_fasta = args.jbrowse_src + '/seq/genome.fasta'
70
46 # Cleanup if existing 71 # Cleanup if existing
47 if(os.path.exists(args.jbrowse)): 72 if not IsRemote():
48 shutil.rmtree(args.jbrowse) 73 if(os.path.exists(args.jbrowse)):
49 # Copy files 74 shutil.rmtree(args.jbrowse)
50 shutil.copytree(args.jbrowse_src, args.jbrowse, symlinks=True) 75 # Copy files
51 76 shutil.copytree(args.jbrowse_src, args.jbrowse, symlinks=True)
52 path_fasta = args.jbrowse + '/seq/genome.fasta' 77
53 path_2bit = args.jbrowse + '/seq/genome.2bit' 78 path_2bit = args.jbrowse + '/seq/genome.2bit'
79 else:
80 twobittemp = tempfile.NamedTemporaryFile(prefix="genome.2bit")
81 path_2bit = twobittemp.name
82 os.chmod(path_2bit, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
54 83
55 # Convert fasta if existing 84 # Convert fasta if existing
56 if(IsBlatEnabled() and os.path.exists(path_fasta)): 85 if IsBlatEnabled() and os.path.exists(path_fasta):
57 arg = ['faToTwoBit', path_fasta, path_2bit] 86 arg = ['faToTwoBit', path_fasta, path_2bit]
58 tmp_stderr = tempfile.NamedTemporaryFile(prefix="tmp-twobit-converter-stderr") 87 proc = subprocess.Popen(args=arg, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
59 proc = subprocess.Popen(args=arg, shell=False, cwd=args.jbrowse, stderr=tmp_stderr.fileno()) 88 out, err = proc.communicate()
60 return_code = proc.wait() 89 if proc.returncode:
61 if return_code:
62 tmp_stderr.flush()
63 tmp_stderr.seek(0)
64 print("Error building index:", file=sys.stderr) 90 print("Error building index:", file=sys.stderr)
65 while True: 91 sys.stderr.write(err)
66 chunk = tmp_stderr.read(CHUNK_SIZE) 92 sys.exit(proc.returncode)
67 if not chunk: 93 if not IsRemote():
68 break 94 # No need to send this in remote mode, it will be in the archive
69 sys.stderr.write(chunk) 95 blat_db = path_2bit
70 sys.exit(return_code) 96
71 blat_db = path_2bit 97 wa = get_apollo_instance()
72 tmp_stderr.close() 98
73 99 # User must have an account, if not, create it
74 wa = WebApolloInstance(args.apollo, args.username, args.password) 100 gx_user = UserObj(**wa.users._assert_or_create_user(args.email))
101 handle_credentials(gx_user)
75 102
76 org_cn = GuessOrg(args, wa) 103 org_cn = GuessOrg(args, wa)
77 if isinstance(org_cn, list): 104 if isinstance(org_cn, list):
78 org_cn = org_cn[0] 105 org_cn = org_cn[0]
79 106
80 # User must have an account, if not, create it 107 if args.org_raw:
81 gx_user = wa.users.assertOrCreateUser(args.email) 108 suffix = IsOrgCNSuffixEnabled()
109 if suffix == 'id' and args.userid:
110 org_cn += ' (gx%s)' % args.userid
111 elif suffix == 'email':
112 org_cn += ' (%s)' % args.email
82 113
83 log.info("Determining if add or update required") 114 log.info("Determining if add or update required")
84 try: 115 try:
85 org = wa.organisms.findOrganismByCn(org_cn) 116 org = wa.organisms.show_organism(org_cn)
86 except Exception: 117 except Exception:
87 org = None 118 org = None
88 119
89 if org: 120 if org and 'error' not in org:
90 old_directory = org['directory'] 121 old_directory = org['directory']
91 122
92 if not PermissionCheck(gx_user, org_cn, "WRITE"): 123 all_orgs = wa.organisms.get_organisms()
93 print("Naming Conflict. You do not have permissions to access this organism. Either request permission from the owner, or choose a different name for your organism.") 124 if 'error' in all_orgs:
94 sys.exit(2) 125 all_orgs = []
126 all_orgs = [x['commonName'] for x in all_orgs]
127 if org_cn not in all_orgs:
128 raise Exception("Could not find organism %s" % org_cn)
129
130 orgs = accessible_organisms(gx_user, [org_cn], 'WRITE')
131 if not orgs:
132 raise Exception("Naming Conflict. You do not have write permission on this organism. Either request permission from the owner, or choose a different name for your organism.")
95 133
96 log.info("\tUpdating Organism") 134 log.info("\tUpdating Organism")
97 data = wa.organisms.updateOrganismInfo( 135 if IsRemote():
98 org['id'], 136 with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive:
99 org_cn, 137 with tarfile.open(archive.name, mode="w:gz") as tar:
100 args.jbrowse, 138 dataset_data_dir = args.jbrowse_src
101 # mandatory 139 for file in glob.glob(dataset_data_dir):
102 genus=args.genus, 140 tar.add(file, arcname=file.replace(dataset_data_dir, './'))
103 species=args.species, 141 if IsBlatEnabled():
104 public=args.public, 142 tar.add(path_2bit, arcname="./searchDatabaseData/genome.2bit")
105 blatdb=blat_db 143 data = wa.remote.update_organism(
106 ) 144 org['id'],
145 archive,
146 # mandatory
147 blatdb=blat_db,
148 genus=args.genus,
149 species=args.species,
150 public=args.public,
151 no_reload_sequences=args.no_reload_sequences
152 )
153 else:
154 data = wa.organisms.update_organism(
155 org['id'],
156 org_cn,
157 args.jbrowse,
158 # mandatory
159 genus=args.genus,
160 species=args.species,
161 public=args.public,
162 blatdb=blat_db,
163 no_reload_sequences=args.no_reload_sequences
164 )
107 time.sleep(2) 165 time.sleep(2)
108 if args.remove_old_directory and args.jbrowse != old_directory: 166
167 if not IsRemote() and args.remove_old_directory and args.jbrowse != old_directory:
109 shutil.rmtree(old_directory) 168 shutil.rmtree(old_directory)
110 169
111 data = [wa.organisms.findOrganismById(org['id'])] 170 data = wa.organisms.show_organism(org_cn)
112 171
113 else: 172 else:
114 # New organism 173 # New organism
115 log.info("\tAdding Organism") 174 log.info("\tAdding Organism")
116 data = wa.organisms.addOrganism( 175
117 org_cn, 176 if IsRemote():
118 args.jbrowse, 177 with tempfile.NamedTemporaryFile(suffix='.tar.gz') as archive:
119 genus=args.genus, 178 with tarfile.open(archive.name, mode="w:gz") as tar:
120 species=args.species, 179 dataset_data_dir = args.jbrowse_src
121 public=args.public, 180 for file in glob.glob(dataset_data_dir):
122 blatdb=blat_db 181 tar.add(file, arcname=file.replace(dataset_data_dir, './'))
123 ) 182 if IsBlatEnabled():
183 with tempfile.TemporaryDirectory() as empty_dir:
184 os.chmod(empty_dir, stat.S_IRUSR | stat.S_IXUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
185 tar.add(empty_dir, arcname="./searchDatabaseData/")
186 tar.add(path_2bit, arcname="./searchDatabaseData/genome.2bit")
187 data = wa.remote.add_organism(
188 org_cn,
189 archive,
190 blatdb=blat_db,
191 genus=args.genus,
192 species=args.species,
193 public=args.public,
194 metadata=None
195 )
196 if isinstance(data, list) and len(data) > 0:
197 data = data[0]
198 else:
199 data = wa.organisms.add_organism(
200 org_cn,
201 args.jbrowse,
202 blatdb=blat_db,
203 genus=args.genus,
204 species=args.species,
205 public=args.public,
206 metadata=None
207 )
124 208
125 # Must sleep before we're ready to handle 209 # Must sleep before we're ready to handle
126 time.sleep(2) 210 time.sleep(2)
127 log.info("Updating permissions for %s on %s", gx_user, org_cn) 211 log.info("Updating permissions for %s on %s", gx_user, org_cn)
128 wa.users.updateOrganismPermission( 212 wa.users.update_organism_permissions(
129 gx_user, org_cn, 213 gx_user.username,
214 org_cn,
130 write=True, 215 write=True,
131 export=True, 216 export=True,
132 read=True, 217 read=True,
133 ) 218 )
134 219
135 # Group access 220 # Group access
136 if args.group: 221 if args.group:
137 group = wa.groups.loadGroupByName(name=args.group) 222 group = wa.groups.get_groups(name=args.group)[0]
138 res = wa.groups.updateOrganismPermission(group, org_cn, 223 res = wa.groups.update_organism_permissions(group['name'], org_cn,
139 administrate=False, write=True, read=True, 224 administrate=False, write=True, read=True,
140 export=True) 225 export=True)
141 226
142 data = [o for o in data if o['commonName'] == org_cn]
143 print(json.dumps(data, indent=2)) 227 print(json.dumps(data, indent=2))