Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/galaxy/tool_util/deps/mulled/mulled_search.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author | shellac |
---|---|
date | Mon, 22 Mar 2021 18:12:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4f3585e2f14b |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import json | |
5 import logging | |
6 import sys | |
7 import tempfile | |
8 | |
9 import requests | |
10 | |
11 from .mulled_list import get_singularity_containers | |
12 from .util import build_target, v2_image_name | |
13 | |
14 try: | |
15 from conda.cli.python_api import run_command | |
16 except ImportError: | |
17 run_command = None # type: ignore | |
18 | |
19 try: | |
20 from whoosh.fields import Schema | |
21 from whoosh.fields import TEXT | |
22 from whoosh.fields import STORED | |
23 from whoosh.index import create_in | |
24 from whoosh.qparser import QueryParser | |
25 except ImportError: | |
26 Schema = TEXT = STORED = create_in = QueryParser = None # type: ignore | |
27 | |
28 QUAY_API_URL = 'https://quay.io/api/v1/repository' | |
29 | |
30 | |
31 class QuaySearch(): | |
32 """ | |
33 Tool to search within a quay organization for a given software name. | |
34 """ | |
35 | |
36 def __init__(self, organization): | |
37 self.index = None | |
38 self.organization = organization | |
39 | |
40 def build_index(self): | |
41 """ | |
42 Create an index to quickly examine the repositories of a given quay.io organization. | |
43 """ | |
44 # download all information about the repositories from the | |
45 # given organization in self.organization | |
46 | |
47 parameters = {'public': 'true', 'namespace': self.organization} | |
48 r = requests.get(QUAY_API_URL, headers={ | |
49 'Accept-encoding': 'gzip'}, params=parameters, timeout=12) | |
50 tmp_dir = tempfile.mkdtemp() | |
51 schema = Schema(title=TEXT(stored=True), content=STORED) | |
52 self.index = create_in(tmp_dir, schema) | |
53 | |
54 json_decoder = json.JSONDecoder() | |
55 decoded_request = json_decoder.decode(r.text) | |
56 writer = self.index.writer() | |
57 for repository in decoded_request['repositories']: | |
58 writer.add_document( | |
59 title=repository['name'], content=repository['description']) | |
60 writer.commit() | |
61 | |
62 def search_repository(self, search_string, non_strict): | |
63 """ | |
64 Search Docker containers on quay.io. | |
65 Results are displayed with all available versions, | |
66 including the complete image name. | |
67 """ | |
68 # with statement closes searcher after usage. | |
69 with self.index.searcher() as searcher: | |
70 query = QueryParser( | |
71 "title", self.index.schema).parse(search_string) | |
72 results = searcher.search(query) | |
73 if non_strict: | |
74 # look for spelling errors and use suggestions as a search term too | |
75 corrector = searcher.corrector("title") | |
76 suggestions = corrector.suggest(search_string, limit=2) | |
77 | |
78 # get all repositories with suggested keywords | |
79 for suggestion in suggestions: | |
80 search_string = "*%s*" % suggestion | |
81 query = QueryParser( | |
82 "title", self.index.schema).parse(search_string) | |
83 results_tmp = searcher.search(query) | |
84 results.extend(results_tmp) | |
85 | |
86 out = list() | |
87 | |
88 for result in results: | |
89 title = result['title'] | |
90 for version in self.get_additional_repository_information(title): | |
91 out.append({'package': title, 'version': version, }) | |
92 | |
93 return out | |
94 | |
95 def get_additional_repository_information(self, repository_string): | |
96 """ | |
97 Function downloads additional information from quay.io to | |
98 get the tag-field which includes the version number. | |
99 """ | |
100 url = f"{QUAY_API_URL}/{self.organization}/{repository_string}" | |
101 r = requests.get(url, headers={'Accept-encoding': 'gzip'}) | |
102 | |
103 json_decoder = json.JSONDecoder() | |
104 decoded_request = json_decoder.decode(r.text) | |
105 return decoded_request['tags'] | |
106 | |
107 | |
108 class CondaSearch(): | |
109 """ | |
110 Tool to search the bioconda channel | |
111 """ | |
112 | |
113 def __init__(self, channel): | |
114 self.channel = channel | |
115 | |
116 def get_json(self, search_string): | |
117 """ | |
118 Function takes search_string variable and returns results from the bioconda channel in JSON format | |
119 | |
120 """ | |
121 if run_command is None: | |
122 raise Exception("Invalid search destination. " + deps_error_message("conda")) | |
123 raw_out, err, exit_code = run_command( | |
124 'search', '-c', | |
125 self.channel, | |
126 search_string, | |
127 use_exception_handler=True) | |
128 if exit_code != 0: | |
129 logging.info('Search failed with: %s' % err) | |
130 return [] | |
131 return [{'package': n.split()[0], 'version': n.split()[1], 'build': n.split()[2]} for n in raw_out.split('\n')[2:-1]] | |
132 | |
133 | |
134 class GitHubSearch(): | |
135 """ | |
136 Tool to search the GitHub bioconda-recipes repo | |
137 """ | |
138 | |
139 def get_json(self, search_string): | |
140 """ | |
141 Takes search_string variable and return results from the bioconda-recipes github repository in JSON format | |
142 """ | |
143 response = requests.get( | |
144 "https://api.github.com/search/code?q=%s+in:path+repo:bioconda/bioconda-recipes+path:recipes" % search_string).json() | |
145 return response | |
146 | |
147 def process_json(self, json, search_string): | |
148 """ | |
149 Take JSON input and process it, returning the required data | |
150 """ | |
151 json = json['items'][0:10] # get top ten results | |
152 | |
153 results = [] | |
154 | |
155 for result in json: | |
156 results.append({'name': result['name'], 'path': result['path']}) | |
157 return results | |
158 | |
159 def recipe_present(self, search_string): | |
160 """ | |
161 Check if a recipe exists in bioconda-recipes which matches search_string exactly | |
162 """ | |
163 if requests.get("https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/%s" % search_string).status_code == 200: | |
164 return True | |
165 else: | |
166 return False | |
167 | |
168 | |
169 def get_package_hash(packages, versions): | |
170 """ | |
171 Take packages and versions (if the latter are given) and returns a hash for each. Also checks github to see if the container is already present. | |
172 """ | |
173 hash_results = {} | |
174 targets = [] | |
175 if versions: | |
176 for p in packages: | |
177 targets.append(build_target(p, version=versions[p])) | |
178 else: # if versions are not given only calculate the package hash | |
179 for p in packages: | |
180 targets.append(build_target(p)) | |
181 # make the hash from the processed targets | |
182 package_hash = v2_image_name(targets) | |
183 hash_results['package_hash'] = package_hash.split(':')[0] | |
184 if versions: | |
185 hash_results['version_hash'] = package_hash.split(':')[1] | |
186 | |
187 r = requests.get("https://quay.io/api/v1/repository/biocontainers/%s" % hash_results['package_hash']) | |
188 if r.status_code == 200: | |
189 hash_results['container_present'] = True | |
190 if versions: # now test if the version hash is listed in the repository tags | |
191 # remove -0, -1, etc from end of the tag | |
192 tags = [n[:-2] for n in r.json()['tags']] | |
193 if hash_results['version_hash'] in tags: | |
194 hash_results['container_present_with_version'] = True | |
195 else: | |
196 hash_results['container_present_with_version'] = False | |
197 else: | |
198 hash_results['container_present'] = False | |
199 return hash_results | |
200 | |
201 | |
202 def singularity_search(search_string): | |
203 """ | |
204 Check if a singularity package is present and return the link. | |
205 """ | |
206 results = [] | |
207 | |
208 containers = get_singularity_containers() | |
209 | |
210 for container in containers: | |
211 if search_string in container: | |
212 name = container.split(':')[0] | |
213 version = container.split(':')[1] | |
214 results.append({'package': name, 'version': version}) | |
215 | |
216 return results | |
217 | |
218 | |
219 def readable_output(json, organization='biocontainers', channel='bioconda'): | |
220 | |
221 # if json is empty: | |
222 if sum([len(json[destination][results]) for destination in json for results in json[destination]]) == 0: | |
223 sys.stdout.write('No results found for that query.\n') | |
224 return | |
225 | |
226 # return results for quay, conda and singularity together | |
227 if sum([len(json[destination][results]) for destination in ['quay', 'conda', 'singularity', ] for results in json.get(destination, [])]) > 0: | |
228 sys.stdout.write("The query returned the following result(s).\n") | |
229 # put quay, conda etc results as lists in lines | |
230 lines = [['LOCATION', 'NAME', 'VERSION', 'COMMAND\n']] | |
231 for search_string, results in json.get('quay', {}).items(): | |
232 for result in results: | |
233 lines.append(['quay', result['package'], result['version'], 'docker pull quay.io/%s/%s:%s\n' % | |
234 (organization, result['package'], result['version'])]) # NOT a real solution | |
235 for search_string, results in json.get('conda', {}).items(): | |
236 for result in results: | |
237 lines.append(['conda', result['package'], '{}--{}'.format(result['version'], result['build']), | |
238 'conda install -c {} {}={}={}\n'.format(channel, result['package'], result['version'], result['build'])]) | |
239 for search_string, results in json.get('singularity', {}).items(): | |
240 for result in results: | |
241 lines.append(['singularity', result['package'], result['version'], | |
242 'wget https://depot.galaxyproject.org/singularity/{}:{}\n'.format(result['package'], result['version'])]) | |
243 | |
244 col_width0, col_width1, col_width2 = (max(len( | |
245 line[n]) for line in lines) + 2 for n in (0, 1, 2)) # def max col widths for the output | |
246 | |
247 # create table | |
248 for line in lines: | |
249 sys.stdout.write("".join((line[0].ljust(col_width0), line[1].ljust( | |
250 col_width1), line[2].ljust(col_width2), line[3]))) # output | |
251 | |
252 if json.get('github_recipe_present', False): | |
253 sys.stdout.write('\n' if 'lines' in locals() else '') | |
254 sys.stdout.write( | |
255 'The following recipes were found in the bioconda-recipes repository which exactly matched one of the search terms:\n') | |
256 lines = [['QUERY', 'LOCATION\n']] | |
257 for recipe in json['github_recipe_present']['recipes']: | |
258 lines.append( | |
259 [recipe, "https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/%s\n" % recipe]) | |
260 | |
261 col_width0 = max(len(line[0]) for line in lines) + 2 | |
262 | |
263 for line in lines: | |
264 sys.stdout.write( | |
265 "".join((line[0].ljust(col_width0), line[1]))) # output | |
266 | |
267 if sum([len(json['github'][results]) for results in json.get('github', [])]) > 0: | |
268 sys.stdout.write('\n' if 'lines' in locals() else '') | |
269 sys.stdout.write( | |
270 "Other result(s) on the bioconda-recipes GitHub repository:\n") | |
271 lines = [['QUERY', 'FILE', 'URL\n']] | |
272 for search_string, results in json.get('github', {}).items(): | |
273 for result in results: | |
274 lines.append([search_string, result['name'], | |
275 'https://github.com/bioconda/bioconda-recipes/tree/master/%s\n' % result['path']]) | |
276 | |
277 # def max col widths for the output | |
278 col_width0, col_width1 = ( | |
279 max(len(line[n]) for line in lines) + 2 for n in (0, 1)) | |
280 | |
281 for line in lines: | |
282 sys.stdout.write("".join( | |
283 (line[0].ljust(col_width0), line[1].ljust(col_width1), line[2]))) # output | |
284 | |
285 | |
286 def deps_error_message(package): | |
287 return "Required dependency [%s] is not installed. Run 'pip install galaxy-tool-util[mulled]'." % package | |
288 | |
289 | |
290 def main(argv=None): | |
291 if Schema is None: | |
292 sys.stdout.write(deps_error_message("Whoosh")) | |
293 return | |
294 | |
295 destination_defaults = ['quay', 'singularity', 'github'] | |
296 if run_command is not None: | |
297 destination_defaults.append('conda') | |
298 | |
299 parser = argparse.ArgumentParser( | |
300 description='Searches in a given quay organization for a repository') | |
301 parser.add_argument('-d', '--destination', dest='search_dest', nargs='+', default=destination_defaults, | |
302 help="Choose where to search. Options are 'conda', 'quay', 'singularity' and 'github'. If no option are given, all will be searched.") | |
303 parser.add_argument('-o', '--organization', dest='organization_string', default="biocontainers", | |
304 help='Change quay organization to search; default is biocontainers.') | |
305 parser.add_argument('-c', '--channel', dest='channel_string', default="bioconda", | |
306 help='Change conda channel to search; default is bioconda.') | |
307 parser.add_argument('--non-strict', dest='non_strict', action="store_true", | |
308 help='Autocorrection of typos activated. Lists more results but can be confusing.\ | |
309 For too many queries quay.io blocks the request and the results can be incomplete.') | |
310 parser.add_argument('-j', '--json', dest='json', | |
311 action="store_true", help='Returns results as JSON.') | |
312 parser.add_argument('-s', '--search', required=True, nargs='+', | |
313 help='The name of the tool(s) to search for.') | |
314 | |
315 args = parser.parse_args() | |
316 json_results = {dest: {} for dest in args.search_dest} | |
317 versions = {} | |
318 | |
319 if len(args.search) > 1: # get hash if multiple packages are searched | |
320 args.search.append(get_package_hash( | |
321 args.search, versions)['package_hash']) | |
322 | |
323 if 'conda' in args.search_dest: | |
324 conda_results = {} | |
325 conda = CondaSearch(args.channel_string) | |
326 | |
327 for item in args.search: | |
328 conda_results[item] = conda.get_json(item) | |
329 json_results['conda'] = conda_results | |
330 | |
331 if 'github' in args.search_dest: | |
332 github_results = {} | |
333 github_recipe_present = [] | |
334 github = GitHubSearch() | |
335 | |
336 for item in args.search: | |
337 github_json = github.get_json(item) | |
338 github_results[item] = github.process_json(github_json, item) | |
339 if github.recipe_present(item): | |
340 github_recipe_present.append(item) | |
341 | |
342 json_results['github'] = github_results | |
343 json_results['github_recipe_present'] = { | |
344 'recipes': github_recipe_present} | |
345 | |
346 if 'quay' in args.search_dest: | |
347 quay_results = {} | |
348 quay = QuaySearch(args.organization_string) | |
349 quay.build_index() | |
350 | |
351 for item in args.search: | |
352 quay_results[item] = quay.search_repository(item, args.non_strict) | |
353 | |
354 json_results['quay'] = quay_results | |
355 | |
356 if 'singularity' in args.search_dest: | |
357 singularity_results = {} | |
358 for item in args.search: | |
359 singularity_results[item] = singularity_search(item) | |
360 json_results['singularity'] = singularity_results | |
361 | |
362 if args.json: | |
363 print(json_results) | |
364 else: | |
365 readable_output(json_results, args.organization_string, | |
366 args.channel_string) | |
367 | |
368 | |
369 if __name__ == "__main__": | |
370 main() |