comparison env/lib/python3.9/site-packages/galaxy/tool_util/deps/mulled/mulled_search.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 #!/usr/bin/env python
2
3 import argparse
4 import json
5 import logging
6 import sys
7 import tempfile
8
9 import requests
10
11 from .mulled_list import get_singularity_containers
12 from .util import build_target, v2_image_name
13
14 try:
15 from conda.cli.python_api import run_command
16 except ImportError:
17 run_command = None # type: ignore
18
19 try:
20 from whoosh.fields import Schema
21 from whoosh.fields import TEXT
22 from whoosh.fields import STORED
23 from whoosh.index import create_in
24 from whoosh.qparser import QueryParser
25 except ImportError:
26 Schema = TEXT = STORED = create_in = QueryParser = None # type: ignore
27
28 QUAY_API_URL = 'https://quay.io/api/v1/repository'
29
30
31 class QuaySearch():
32 """
33 Tool to search within a quay organization for a given software name.
34 """
35
36 def __init__(self, organization):
37 self.index = None
38 self.organization = organization
39
40 def build_index(self):
41 """
42 Create an index to quickly examine the repositories of a given quay.io organization.
43 """
44 # download all information about the repositories from the
45 # given organization in self.organization
46
47 parameters = {'public': 'true', 'namespace': self.organization}
48 r = requests.get(QUAY_API_URL, headers={
49 'Accept-encoding': 'gzip'}, params=parameters, timeout=12)
50 tmp_dir = tempfile.mkdtemp()
51 schema = Schema(title=TEXT(stored=True), content=STORED)
52 self.index = create_in(tmp_dir, schema)
53
54 json_decoder = json.JSONDecoder()
55 decoded_request = json_decoder.decode(r.text)
56 writer = self.index.writer()
57 for repository in decoded_request['repositories']:
58 writer.add_document(
59 title=repository['name'], content=repository['description'])
60 writer.commit()
61
62 def search_repository(self, search_string, non_strict):
63 """
64 Search Docker containers on quay.io.
65 Results are displayed with all available versions,
66 including the complete image name.
67 """
68 # with statement closes searcher after usage.
69 with self.index.searcher() as searcher:
70 query = QueryParser(
71 "title", self.index.schema).parse(search_string)
72 results = searcher.search(query)
73 if non_strict:
74 # look for spelling errors and use suggestions as a search term too
75 corrector = searcher.corrector("title")
76 suggestions = corrector.suggest(search_string, limit=2)
77
78 # get all repositories with suggested keywords
79 for suggestion in suggestions:
80 search_string = "*%s*" % suggestion
81 query = QueryParser(
82 "title", self.index.schema).parse(search_string)
83 results_tmp = searcher.search(query)
84 results.extend(results_tmp)
85
86 out = list()
87
88 for result in results:
89 title = result['title']
90 for version in self.get_additional_repository_information(title):
91 out.append({'package': title, 'version': version, })
92
93 return out
94
95 def get_additional_repository_information(self, repository_string):
96 """
97 Function downloads additional information from quay.io to
98 get the tag-field which includes the version number.
99 """
100 url = f"{QUAY_API_URL}/{self.organization}/{repository_string}"
101 r = requests.get(url, headers={'Accept-encoding': 'gzip'})
102
103 json_decoder = json.JSONDecoder()
104 decoded_request = json_decoder.decode(r.text)
105 return decoded_request['tags']
106
107
108 class CondaSearch():
109 """
110 Tool to search the bioconda channel
111 """
112
113 def __init__(self, channel):
114 self.channel = channel
115
116 def get_json(self, search_string):
117 """
118 Function takes search_string variable and returns results from the bioconda channel in JSON format
119
120 """
121 if run_command is None:
122 raise Exception("Invalid search destination. " + deps_error_message("conda"))
123 raw_out, err, exit_code = run_command(
124 'search', '-c',
125 self.channel,
126 search_string,
127 use_exception_handler=True)
128 if exit_code != 0:
129 logging.info('Search failed with: %s' % err)
130 return []
131 return [{'package': n.split()[0], 'version': n.split()[1], 'build': n.split()[2]} for n in raw_out.split('\n')[2:-1]]
132
133
134 class GitHubSearch():
135 """
136 Tool to search the GitHub bioconda-recipes repo
137 """
138
139 def get_json(self, search_string):
140 """
141 Takes search_string variable and return results from the bioconda-recipes github repository in JSON format
142 """
143 response = requests.get(
144 "https://api.github.com/search/code?q=%s+in:path+repo:bioconda/bioconda-recipes+path:recipes" % search_string).json()
145 return response
146
147 def process_json(self, json, search_string):
148 """
149 Take JSON input and process it, returning the required data
150 """
151 json = json['items'][0:10] # get top ten results
152
153 results = []
154
155 for result in json:
156 results.append({'name': result['name'], 'path': result['path']})
157 return results
158
159 def recipe_present(self, search_string):
160 """
161 Check if a recipe exists in bioconda-recipes which matches search_string exactly
162 """
163 if requests.get("https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/%s" % search_string).status_code == 200:
164 return True
165 else:
166 return False
167
168
169 def get_package_hash(packages, versions):
170 """
171 Take packages and versions (if the latter are given) and returns a hash for each. Also checks github to see if the container is already present.
172 """
173 hash_results = {}
174 targets = []
175 if versions:
176 for p in packages:
177 targets.append(build_target(p, version=versions[p]))
178 else: # if versions are not given only calculate the package hash
179 for p in packages:
180 targets.append(build_target(p))
181 # make the hash from the processed targets
182 package_hash = v2_image_name(targets)
183 hash_results['package_hash'] = package_hash.split(':')[0]
184 if versions:
185 hash_results['version_hash'] = package_hash.split(':')[1]
186
187 r = requests.get("https://quay.io/api/v1/repository/biocontainers/%s" % hash_results['package_hash'])
188 if r.status_code == 200:
189 hash_results['container_present'] = True
190 if versions: # now test if the version hash is listed in the repository tags
191 # remove -0, -1, etc from end of the tag
192 tags = [n[:-2] for n in r.json()['tags']]
193 if hash_results['version_hash'] in tags:
194 hash_results['container_present_with_version'] = True
195 else:
196 hash_results['container_present_with_version'] = False
197 else:
198 hash_results['container_present'] = False
199 return hash_results
200
201
202 def singularity_search(search_string):
203 """
204 Check if a singularity package is present and return the link.
205 """
206 results = []
207
208 containers = get_singularity_containers()
209
210 for container in containers:
211 if search_string in container:
212 name = container.split(':')[0]
213 version = container.split(':')[1]
214 results.append({'package': name, 'version': version})
215
216 return results
217
218
219 def readable_output(json, organization='biocontainers', channel='bioconda'):
220
221 # if json is empty:
222 if sum([len(json[destination][results]) for destination in json for results in json[destination]]) == 0:
223 sys.stdout.write('No results found for that query.\n')
224 return
225
226 # return results for quay, conda and singularity together
227 if sum([len(json[destination][results]) for destination in ['quay', 'conda', 'singularity', ] for results in json.get(destination, [])]) > 0:
228 sys.stdout.write("The query returned the following result(s).\n")
229 # put quay, conda etc results as lists in lines
230 lines = [['LOCATION', 'NAME', 'VERSION', 'COMMAND\n']]
231 for search_string, results in json.get('quay', {}).items():
232 for result in results:
233 lines.append(['quay', result['package'], result['version'], 'docker pull quay.io/%s/%s:%s\n' %
234 (organization, result['package'], result['version'])]) # NOT a real solution
235 for search_string, results in json.get('conda', {}).items():
236 for result in results:
237 lines.append(['conda', result['package'], '{}--{}'.format(result['version'], result['build']),
238 'conda install -c {} {}={}={}\n'.format(channel, result['package'], result['version'], result['build'])])
239 for search_string, results in json.get('singularity', {}).items():
240 for result in results:
241 lines.append(['singularity', result['package'], result['version'],
242 'wget https://depot.galaxyproject.org/singularity/{}:{}\n'.format(result['package'], result['version'])])
243
244 col_width0, col_width1, col_width2 = (max(len(
245 line[n]) for line in lines) + 2 for n in (0, 1, 2)) # def max col widths for the output
246
247 # create table
248 for line in lines:
249 sys.stdout.write("".join((line[0].ljust(col_width0), line[1].ljust(
250 col_width1), line[2].ljust(col_width2), line[3]))) # output
251
252 if json.get('github_recipe_present', False):
253 sys.stdout.write('\n' if 'lines' in locals() else '')
254 sys.stdout.write(
255 'The following recipes were found in the bioconda-recipes repository which exactly matched one of the search terms:\n')
256 lines = [['QUERY', 'LOCATION\n']]
257 for recipe in json['github_recipe_present']['recipes']:
258 lines.append(
259 [recipe, "https://api.github.com/repos/bioconda/bioconda-recipes/contents/recipes/%s\n" % recipe])
260
261 col_width0 = max(len(line[0]) for line in lines) + 2
262
263 for line in lines:
264 sys.stdout.write(
265 "".join((line[0].ljust(col_width0), line[1]))) # output
266
267 if sum([len(json['github'][results]) for results in json.get('github', [])]) > 0:
268 sys.stdout.write('\n' if 'lines' in locals() else '')
269 sys.stdout.write(
270 "Other result(s) on the bioconda-recipes GitHub repository:\n")
271 lines = [['QUERY', 'FILE', 'URL\n']]
272 for search_string, results in json.get('github', {}).items():
273 for result in results:
274 lines.append([search_string, result['name'],
275 'https://github.com/bioconda/bioconda-recipes/tree/master/%s\n' % result['path']])
276
277 # def max col widths for the output
278 col_width0, col_width1 = (
279 max(len(line[n]) for line in lines) + 2 for n in (0, 1))
280
281 for line in lines:
282 sys.stdout.write("".join(
283 (line[0].ljust(col_width0), line[1].ljust(col_width1), line[2]))) # output
284
285
286 def deps_error_message(package):
287 return "Required dependency [%s] is not installed. Run 'pip install galaxy-tool-util[mulled]'." % package
288
289
290 def main(argv=None):
291 if Schema is None:
292 sys.stdout.write(deps_error_message("Whoosh"))
293 return
294
295 destination_defaults = ['quay', 'singularity', 'github']
296 if run_command is not None:
297 destination_defaults.append('conda')
298
299 parser = argparse.ArgumentParser(
300 description='Searches in a given quay organization for a repository')
301 parser.add_argument('-d', '--destination', dest='search_dest', nargs='+', default=destination_defaults,
302 help="Choose where to search. Options are 'conda', 'quay', 'singularity' and 'github'. If no option are given, all will be searched.")
303 parser.add_argument('-o', '--organization', dest='organization_string', default="biocontainers",
304 help='Change quay organization to search; default is biocontainers.')
305 parser.add_argument('-c', '--channel', dest='channel_string', default="bioconda",
306 help='Change conda channel to search; default is bioconda.')
307 parser.add_argument('--non-strict', dest='non_strict', action="store_true",
308 help='Autocorrection of typos activated. Lists more results but can be confusing.\
309 For too many queries quay.io blocks the request and the results can be incomplete.')
310 parser.add_argument('-j', '--json', dest='json',
311 action="store_true", help='Returns results as JSON.')
312 parser.add_argument('-s', '--search', required=True, nargs='+',
313 help='The name of the tool(s) to search for.')
314
315 args = parser.parse_args()
316 json_results = {dest: {} for dest in args.search_dest}
317 versions = {}
318
319 if len(args.search) > 1: # get hash if multiple packages are searched
320 args.search.append(get_package_hash(
321 args.search, versions)['package_hash'])
322
323 if 'conda' in args.search_dest:
324 conda_results = {}
325 conda = CondaSearch(args.channel_string)
326
327 for item in args.search:
328 conda_results[item] = conda.get_json(item)
329 json_results['conda'] = conda_results
330
331 if 'github' in args.search_dest:
332 github_results = {}
333 github_recipe_present = []
334 github = GitHubSearch()
335
336 for item in args.search:
337 github_json = github.get_json(item)
338 github_results[item] = github.process_json(github_json, item)
339 if github.recipe_present(item):
340 github_recipe_present.append(item)
341
342 json_results['github'] = github_results
343 json_results['github_recipe_present'] = {
344 'recipes': github_recipe_present}
345
346 if 'quay' in args.search_dest:
347 quay_results = {}
348 quay = QuaySearch(args.organization_string)
349 quay.build_index()
350
351 for item in args.search:
352 quay_results[item] = quay.search_repository(item, args.non_strict)
353
354 json_results['quay'] = quay_results
355
356 if 'singularity' in args.search_dest:
357 singularity_results = {}
358 for item in args.search:
359 singularity_results[item] = singularity_search(item)
360 json_results['singularity'] = singularity_results
361
362 if args.json:
363 print(json_results)
364 else:
365 readable_output(json_results, args.organization_string,
366 args.channel_string)
367
368
369 if __name__ == "__main__":
370 main()