# HG changeset patch # User gga # Date 1505123245 14400 # Node ID c6d7f19953a688b20844bdc0c4ef6841c7e9f889 planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit f745b23c84a615bf434d717c8c0e553a012f0268 diff -r 000000000000 -r c6d7f19953a6 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,39 @@ +Galaxy-apollo +============= + +Galaxy tools to interface with Apollo The webapollo.py file is also +`separately +available `__ +as a pip-installable package. + +Environ + +The following environment variables must be set: + ++--------------------------------+-----------------------------------------------------------+ +| ENV | Use | ++================================+===========================================================+ +| ``$GALAXY_WEBAPOLLO_URL`` | The URL at which Apollo is accessible, internal to Galaxy | +| | and where the tools run. Must be absolute, with FQDN and | +| | protocol. | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_WEBAPOLLO_USER`` | The admin user which Galaxy should use to talk to Apollo. | +| | | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_WEBAPOLLO_PASSWORD`` | The password for the admin user. | +| | | +| | | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_WEBAPOLLO_EXT_URL`` | users. May be relative or absolute. | +| | The external URL at which Apollo is accessible to end | +| | | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_SHARED_DIR`` | Directory shared between Galaxy and Apollo, used to | +| | exchange JBrowse instances. | ++--------------------------------+-----------------------------------------------------------+ + +License +------- + +All python scripts, wrappers, and the webapollo.py are licensed under +MIT license. diff -r 000000000000 -r c6d7f19953a6 create_account.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_account.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,44 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import random +import time + +from builtins import range, str + +from webapollo import WAAuth, WebApolloInstance + + +def pwgen(length): + chars = list('qwrtpsdfghjklzxcvbnm') + return ''.join(random.choice(chars) for _ in range(length)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an account via web services') + WAAuth(parser) + + parser.add_argument('email', help='User Email') + parser.add_argument('--first', help='First Name', default='Jane') + parser.add_argument('--last', help='Last Name', default='Aggie') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + password = pwgen(12) + time.sleep(1) + users = wa.users.loadUsers() + user = [u for u in users + if u.username == args.email] + + if len(user) == 1: + # Update name, regen password if the user ran it again + userObj = user[0] + returnData = wa.users.updateUser(userObj, args.email, args.first, args.last, password) + print('Updated User\nUsername: %s\nPassword: %s' % (args.email, password)) + else: + returnData = wa.users.createUser(args.email, args.first, args.last, password, role='user') + print('Created User\nUsername: %s\nPassword: %s' % (args.email, password)) + + print("Return data: " + str(returnData)) diff -r 000000000000 -r c6d7f19953a6 create_features_from_gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_features_from_gff3.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,186 @@ +#!/usr/bin/env python +import argparse +import logging +import sys +import time + +from builtins import str + +from BCBio import GFF + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + WAAuth(parser) + parser.add_argument('email', help='User Email') + parser.add_argument('--source', help='URL where the input dataset can be found.') + OrgOrGuess(parser) + + parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + # Get organism + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # TODO: Check user perms on org. + org = wa.organisms.findOrganismByCn(org_cn) + + bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID'] + + sys.stdout.write('# ') + sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages'])) + sys.stdout.write('\n') + # print(wa.annotations.getFeatures()) + for rec in GFF.parse(args.gff3): + wa.annotations.setSequence(rec.id, org['id']) + for feature in rec.features: + # We can only handle genes right now + if feature.type not in ('gene', 'terminator'): + continue + # Convert the feature into a presentation that Apollo will accept + featureData = featuresToFeatureSchema([feature]) + if 'children' in featureData[0] and any([child['type']['name'] == 'tRNA' for child in featureData[0]['children']]): + # We're experiencing a (transient?) problem where gene_001 to + # gene_025 will be rejected. Thus, hardcode to a known working + # gene name and update later. + + featureData[0]['name'] = 'tRNA_000' + tRNA_sf = [child for child in feature.sub_features if child.type == 'tRNA'][0] + tRNA_type = 'tRNA-' + tRNA_sf.qualifiers.get('Codon', ["Unk"])[0] + + if 'Name' in feature.qualifiers: + if feature.qualifiers['Name'][0].startswith('tRNA-'): + tRNA_type = feature.qualifiers['Name'][0] + + newfeature = wa.annotations.addFeature(featureData, trustme=True) + + def func0(): + wa.annotations.setName( + newfeature['features'][0]['uniquename'], + tRNA_type, + ) + retry(func0) + + if args.source: + gene_id = newfeature['features'][0]['parent_id'] + + def setSource(): + wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) + retry(setSource) + + sys.stdout.write('\t'.join([ + feature.id, + newfeature['features'][0]['uniquename'], + 'success', + ])) + elif featureData[0]['type']['name'] == 'terminator': + # We're experiencing a (transient?) problem where gene_001 to + # gene_025 will be rejected. Thus, hardcode to a known working + # gene name and update later. + featureData[0]['name'] = 'terminator_000' + newfeature = wa.annotations.addFeature(featureData, trustme=True) + + def func0(): + wa.annotations.setName( + newfeature['features'][0]['uniquename'], + 'terminator' + ) + + retry(func0) + + if args.source: + gene_id = newfeature['features'][0]['parent_id'] + + def setSource(): + wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) + retry(setSource) + + sys.stdout.write('\t'.join([ + feature.id, + newfeature['features'][0]['uniquename'], + 'success', + ])) + else: + try: + # We're experiencing a (transient?) problem where gene_001 to + # gene_025 will be rejected. Thus, hardcode to a known working + # gene name and update later. + featureData[0]['name'] = 'gene_000' + # Extract CDS feature from the feature data, this will be used + # to set the CDS location correctly (apollo currently screwing + # this up (2.0.6)) + CDS = featureData[0]['children'][0]['children'] + CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location'] + # Create the new feature + newfeature = wa.annotations.addFeature(featureData, trustme=True) + # Extract the UUIDs that apollo returns to us + mrna_id = newfeature['features'][0]['uniquename'] + gene_id = newfeature['features'][0]['parent_id'] + # Sleep to give it time to actually persist the feature. Apollo + # is terrible about writing + immediately reading back written + # data. + time.sleep(1) + # Correct the translation start, but with strand specific log + if CDS['strand'] == 1: + wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax'])) + else: + wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1) + + # Finally we set the name, this should be correct. + time.sleep(0.5) + wa.annotations.setName(mrna_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0]) + time.sleep(0.5) + + def func(): + wa.annotations.setName(gene_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0]) + retry(func) + + if args.source: + gene_id = newfeature['features'][0]['parent_id'] + + def setSource(): + wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) + retry(setSource) + extra_attr = {} + for (key, values) in feature.qualifiers.items(): + if key in bad_quals: + continue + + if key == 'Note': + def func2(): + wa.annotations.addComments(gene_id, values) + retry(func2) + else: + extra_attr[key] = values + + def func3(): + wa.annotations.addAttributes(gene_id, extra_attr) + retry(func3) + + sys.stdout.write('\t'.join([ + feature.id, + gene_id, + 'success', + ])) + except Exception as e: + msg = str(e) + if '\n' in msg: + msg = msg[0:msg.index('\n')] + sys.stdout.write('\t'.join([ + feature.id, + '', + 'ERROR', + msg + ])) + sys.stdout.write('\n') + sys.stdout.flush() diff -r 000000000000 -r c6d7f19953a6 create_or_update_organism.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_or_update_organism.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,102 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json +import logging +import shutil +import sys +import time + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Create or update an organism in an Apollo instance') + WAAuth(parser) + + parser.add_argument('jbrowse', help='JBrowse Data Directory') + parser.add_argument('email', help='User Email') + OrgOrGuess(parser) + parser.add_argument('--genus', help='Organism Genus') + parser.add_argument('--species', help='Organism Species') + parser.add_argument('--public', action='store_true', help='Make organism public') + parser.add_argument('--group', help='Give access to a user group') + parser.add_argument('--remove_old_directory', action='store_true', help='Remove old directory') + + args = parser.parse_args() + wa = WebApolloInstance(args.apollo, args.username, args.password) + + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + log.info("Determining if add or update required") + try: + org = wa.organisms.findOrganismByCn(org_cn) + except Exception: + org = None + + if org: + has_perms = False + old_directory = org.directory + for user_owned_organism in gx_user.organismPermissions: + if 'WRITE' in user_owned_organism['permissions']: + has_perms = True + break + + if not has_perms: + print("Naming Conflict. You do not have permissions to access this organism. Either request permission from the owner, or choose a different name for your organism.") + sys.exit(2) + + log.info("\tUpdating Organism") + data = wa.organisms.updateOrganismInfo( + org['id'], + org_cn, + args.jbrowse, + # mandatory + genus=args.genus, + species=args.species, + public=args.public + ) + time.sleep(2) + if(args.remove_old_directory): + shutil.rmtree(old_directory) + + data = [wa.organisms.findOrganismById(org['id'])] + + else: + # New organism + log.info("\tAdding Organism") + data = wa.organisms.addOrganism( + org_cn, + args.jbrowse, + genus=args.genus, + species=args.species, + public=args.public + ) + + # Must sleep before we're ready to handle + time.sleep(2) + log.info("Updating permissions for %s on %s", gx_user, org_cn) + wa.users.updateOrganismPermission( + gx_user, org_cn, + write=True, + export=True, + read=True, + ) + + # Group access + if args.group: + group = wa.groups.loadGroupByName(name=args.group) + res = wa.groups.updateOrganismPermission(group, org_cn, + administrate=False, write=True, read=True, + export=True) + + data = [o for o in data if o['commonName'] == org_cn] + print(json.dumps(data, indent=2)) diff -r 000000000000 -r c6d7f19953a6 delete_features.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delete_features.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,66 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import logging +import random + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance, retry +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to delete all features from an organism') + WAAuth(parser) + parser.add_argument('email', help='User Email') + parser.add_argument('--type', help='Feature type filter') + OrgOrGuess(parser) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + # Get organism + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # TODO: Check user perms on org. + org = wa.organisms.findOrganismByCn(org_cn) + + sequences = wa.organisms.getSequencesForOrganism(org['id']) + for sequence in sequences['sequences']: + log.info("Processing %s %s", org['commonName'], sequence['name']) + # Call setSequence to tell apollo which organism we're working with + wa.annotations.setSequence(sequence['name'], org['id']) + # Then get a list of features. + features = wa.annotations.getFeatures() + # For each feature in the features + for feature in sorted(features['features'], key=lambda x: random.random()): + if args.type: + if args.type == 'tRNA': + if feature['type']['name'] != 'tRNA': + continue + + elif args.type == 'terminator': + if feature['type']['name'] != 'terminator': + continue + + elif args.type == 'mRNA': + if feature['type']['name'] != 'mRNA': + continue + + else: + raise Exception("Unknown type") + + # We see that deleteFeatures wants a uniqueName, and so we pass + # is the uniquename field in the feature. + def fn(): + wa.annotations.deleteFeatures([feature['uniquename']]) + print('Deleted %s [type=%s]' % (feature['uniquename'], feature['type']['name'])) + + if not retry(fn, limit=3): + print('Error %s' % feature['uniquename']) diff -r 000000000000 -r c6d7f19953a6 delete_organism.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delete_organism.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,40 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import logging + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to completely delete an organism') + WAAuth(parser) + parser.add_argument('email', help='User Email') + OrgOrGuess(parser) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + # Get organism + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # TODO: Check user perms on org. + org = wa.organisms.findOrganismByCn(org_cn) + + # Call setSequence to tell apollo which organism we're working with + wa.annotations.setSequence(org['commonName'], org['id']) + # Then get a list of features. + features = wa.annotations.getFeatures() + # For each feature in the features + for feature in features['features']: + # We see that deleteFeatures wants a uniqueName, and so we pass + # is the uniquename field in the feature. + print(wa.annotations.deleteFeatures([feature['uniquename']])) diff -r 000000000000 -r c6d7f19953a6 export.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,88 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json +import sys + +from BCBio import GFF + +from Bio import SeqIO + +from future import standard_library + +from webapollo import CnOrGuess, GuessCn, WAAuth, WebApolloInstance + +standard_library.install_aliases() +try: + import StringIO as io +except ImportError: + import io + + +def export(org_cn, seqs): + org_data = wa.organisms.findOrganismByCn(org_cn) + + data = io.StringIO() + + kwargs = dict( + exportType='GFF3', + seqType='genomic', + exportGff3Fasta=True, + output="text", + exportFormat="text", + organism=org_cn, + ) + + if len(seqs) > 0: + data.write(wa.io.write( + exportAllSequences=False, + sequences=seqs, + **kwargs + ).encode('utf-8')) + else: + data.write(wa.io.write( + exportAllSequences=True, + sequences=[], + **kwargs + ).encode('utf-8')) + + # Seek back to start + data.seek(0) + + records = list(GFF.parse(data)) + if len(records) == 0: + print("Could not find any sequences or annotations for this organism + reference sequence") + sys.exit(2) + else: + for record in records: + record.annotations = {} + record.features = sorted(record.features, key=lambda x: x.location.start) + if args.gff: + GFF.write([record], args.gff) + record.description = "" + if args.fasta: + SeqIO.write([record], args.fasta, 'fasta') + + return org_data + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + WAAuth(parser) + CnOrGuess(parser) + parser.add_argument('--gff', type=argparse.FileType('w')) + parser.add_argument('--fasta', type=argparse.FileType('w')) + parser.add_argument('--json', type=argparse.FileType('w')) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + org_cn_list, seqs = GuessCn(args, wa) + + org_data = [] + for org_cn in org_cn_list: + indiv_org_data = export(org_cn, seqs) + org_data.append(indiv_org_data) + args.json.write(json.dumps(org_data, indent=2)) diff -r 000000000000 -r c6d7f19953a6 fetch_organism_jbrowse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetch_organism_jbrowse.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,113 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import filecmp +import logging +import os +import subprocess +import sys +import time + +from webapollo import GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def are_dir_trees_equal(dir1, dir2): + """ + Compare two directories recursively. Files in each directory are + assumed to be equal if their names and contents are equal. + + @param dir1: First directory path + @param dir2: Second directory path + + @return: True if the directory trees are the same and + there were no errors while accessing the directories or files, + False otherwise. + + # http://stackoverflow.com/questions/4187564/recursive-dircmp-compare-two-directories-to-ensure-they-have-the-same-files-and/6681395#6681395 + """ + + dirs_cmp = filecmp.dircmp(dir1, dir2) + if len(dirs_cmp.left_only) > 0 or len(dirs_cmp.right_only) > 0 or \ + len(dirs_cmp.funny_files) > 0: + print(('LEFT', dirs_cmp.left_only)) + print(('RIGHT', dirs_cmp.right_only)) + print(('FUNNY', dirs_cmp.funny_files)) + return False + (_, mismatch, errors) = filecmp.cmpfiles( + dir1, dir2, dirs_cmp.common_files, shallow=False) + if len(mismatch) > 0 or len(errors) > 0: + print(mismatch) + print(errors) + return False + for common_dir in dirs_cmp.common_dirs: + new_dir1 = os.path.join(dir1, common_dir) + new_dir2 = os.path.join(dir2, common_dir) + if not are_dir_trees_equal(new_dir1, new_dir2): + return False + return True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + WAAuth(parser) + OrgOrGuess(parser) + parser.add_argument('target_dir', help='Target directory') + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + org = wa.organisms.findOrganismByCn(org_cn) + + if not os.path.exists(args.target_dir): + os.makedirs(args.target_dir) + + if not os.path.exists(os.path.join(org['directory'], 'seq')): + sys.stderr.write("Missing seq directory BEFORE copy") + sys.exit(1) + + cmd = [ + 'rsync', '-avr', + org['directory'].rstrip('/') + '/', + os.path.join(args.target_dir, 'data', '') + ] + # We run this OBSESSIVELY because my org had a hiccup where the origin + # (silent) cp -R failed at one point. This caused MANY HEADACHES. + # + # Our response is to run this 3 times (in case the issue is temporary), + # with delays in between. And ensure that we have the correct number of + # files / folders before and after. + sys.stderr.write(' '.join(cmd)) + sys.stderr.write('\n') + sys.stderr.write(subprocess.check_output(cmd)) + if not are_dir_trees_equal( + os.path.join(org['directory'].rstrip('/')), + os.path.join(args.target_dir, 'data') + ): + # Not good + time.sleep(5) + sys.stderr.write('\n') + sys.stderr.write(' '.join(cmd)) + sys.stderr.write('\n') + sys.stderr.write(subprocess.check_output(cmd)) + if not are_dir_trees_equal( + os.path.join(org['directory'].rstrip('/'), 'data'), + os.path.join(args.target_dir, 'data') + ): + time.sleep(5) + sys.stderr.write('\n') + sys.stderr.write(' '.join(cmd)) + sys.stderr.write('\n') + sys.stderr.write(subprocess.check_output(cmd)) + if not are_dir_trees_equal( + os.path.join(org['directory'].rstrip('/'), 'data'), + os.path.join(args.target_dir, 'data') + ): + sys.stderr.write('FAILED THREE TIMES TO COPY. SOMETHING IS WRONG WRONG WRONG.') + sys.exit(2) diff -r 000000000000 -r c6d7f19953a6 fetch_organism_jbrowse.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetch_organism_jbrowse.xml Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,84 @@ + + + for an organism, from Apollo + + macros.xml + + + + %P\n' | sort >> $jbrowse && +echo '' >> $jbrowse + +]]> + + + + + + +

JBrowse Data Directory

+

+ Hi! This is not a full JBrowse instance. JBrowse v0.4(+?) + started shipping with the ability to produce just the + "data" directory from a JBrowse instance, rather than a + complete, standalone instance. This was intended to be used + with the in-development Apollo integration, but may have other + uses as well. +

+

+ This is not usable on its own. The output dataset may be + used with Apollo, or may be passed through the "JBrowse - + Convert to Standalone" tool in Galaxy to "upgrade" to a full + JBrowse instance. +

+

+ The following list is provided for your convenience / debugging. +

+

Contained Files

+
    + ]]> + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r c6d7f19953a6 json2iframe.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json2iframe.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('json', type=argparse.FileType("r"), help='JSON Data') + parser.add_argument('external_apollo_url') + + args = parser.parse_args() + + # https://fqdn/apollo/annotator/loadLink?loc=NC_005880:0..148317&organism=326&tracks= + data = json.load(args.json) + + # This is base64 encoded to get past the toolshed's filters. + HTML_TPL = """ + + + Embedded Apollo Access + + + + + + + """ + + print(HTML_TPL.format(base_url=args.external_apollo_url, chrom="", orgId=data[0]['id'])) diff -r 000000000000 -r c6d7f19953a6 list_organisms.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_organisms.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,22 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json + +from webapollo import AssertUser, WAAuth, WebApolloInstance, accessible_organisms + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='List all organisms available in an Apollo instance') + WAAuth(parser) + parser.add_argument('email', help='User Email') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + all_orgs = wa.organisms.findAllOrganisms() + + orgs = accessible_organisms(gx_user, all_orgs) + + print(json.dumps(orgs, indent=2)) diff -r 000000000000 -r c6d7f19953a6 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,129 @@ + + + + + python + biopython + bcbiogff + requests + future + + + + \$GALAXY_SHARED_DIR + +"\$GALAXY_WEBAPOLLO_EXT_URL" + + +"\$GALAXY_WEBAPOLLO_URL" + + +"\$GALAXY_WEBAPOLLO_URL" +"\$GALAXY_WEBAPOLLO_USER" +"\$GALAXY_WEBAPOLLO_PASSWORD" + + + + + + + 0: + --seq_raw + #for $item in $cn_source.refseqs: + "${item.refseq}" + #end for + #end if +#end if +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#if $reference_genome.reference_genome_source == 'history': + ln -s $reference_genome.genome_fasta genomeref.fa; +#end if + + +#if $reference_genome.reference_genome_source == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#elif $reference_genome.reference_genome_source == 'history': + genomeref.fa +#end if + + +`_ +]]> + + diff -r 000000000000 -r c6d7f19953a6 test-data/bad-model.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bad-model.gff3 Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,9 @@ +##gff-version 3 +##sequence-region Maroon_JMcDermott 1 144762 +Maroon_JMcDermott . gene 14488 14805 . + . Name=gene_26;date_creation=2016-02-17;owner=jmc_texas@tamu.edu;ID=707c88b7-36d1-44e3-93e6-d1d4f1219d57;date_last_modified=2016-02-17 +Maroon_JMcDermott . mRNA 14488 14805 . + . Name=gene_26-00001;date_creation=2016-02-17;Parent=707c88b7-36d1-44e3-93e6-d1d4f1219d57;owner=jmc_texas@tamu.edu;ID=8760695d-b88c-41c0-857b-540e6db81fe8;date_last_modified=2016-02-17 +Maroon_JMcDermott . CDS 14707 14805 . + 0 Name=94abf796-4c8d-45f4-916b-4d279616565e-CDS;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=94abf796-4c8d-45f4-916b-4d279616565e +Maroon_JMcDermott . exon 14497 14805 . + . Name=d2ebd8d0-6558-4674-a38f-346f88256340-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=d2ebd8d0-6558-4674-a38f-346f88256340 +Maroon_JMcDermott . exon 14488 14491 . + . Name=2e4119f9-3220-4502-8ddd-4821c872e0d6-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=2e4119f9-3220-4502-8ddd-4821c872e0d6 +Maroon_JMcDermott . non_canonical_five_prime_splice_site 14494 14494 . + . Name=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_five_prime_splice_site-14493;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_five_prime_splice_site-14493 +Maroon_JMcDermott . non_canonical_three_prime_splice_site 14497 14497 . + . Name=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_three_prive_splice_site-14496;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_three_prive_splice_site-14496 diff -r 000000000000 -r c6d7f19953a6 test-data/fake.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fake.json Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,1 @@ +[{"id": "fake"}] diff -r 000000000000 -r c6d7f19953a6 test-data/good-model.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/good-model.gff3 Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,7 @@ +##gff-version 3 +##sequence-region Maroon_JMcDermott 1 14805 +Maroon_JMcDermott feature gene 14488 14805 . + . ID=707c88b7-36d1-44e3-93e6-d1d4f1219d57;Name=gene_26;date_creation=2016-02-17;date_last_modified=2016-02-17;owner=jmc_texas%40tamu.edu +Maroon_JMcDermott feature mRNA 14488 14805 . + . ID=8760695d-b88c-41c0-857b-540e6db81fe8;Name=gene_26-00001;Parent=707c88b7-36d1-44e3-93e6-d1d4f1219d57;date_creation=2016-02-17;date_last_modified=2016-02-17;owner=jmc_texas%40tamu.edu +Maroon_JMcDermott feature CDS 14707 14805 . + 0 ID=94abf796-4c8d-45f4-916b-4d279616565e;Name=94abf796-4c8d-45f4-916b-4d279616565e-CDS;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 +Maroon_JMcDermott feature exon 14497 14805 . + . ID=d2ebd8d0-6558-4674-a38f-346f88256340;Name=d2ebd8d0-6558-4674-a38f-346f88256340-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 +Maroon_JMcDermott feature Shine_Dalgarno_sequence 14488 14491 . + . ID=2e4119f9-3220-4502-8ddd-4821c872e0d6;Name=2e4119f9-3220-4502-8ddd-4821c872e0d6-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 diff -r 000000000000 -r c6d7f19953a6 webapollo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/webapollo.py Mon Sep 11 05:47:25 2017 -0400 @@ -0,0 +1,1718 @@ +from __future__ import print_function + +import argparse +import collections +import json +import logging +import os +import time + +from abc import abstractmethod +from builtins import next +from builtins import object +from builtins import str + +from BCBio import GFF + +from Bio import SeqIO + +from future import standard_library + +import requests + + +standard_library.install_aliases() +try: + import StringIO as io +except BaseException: + import io +logging.getLogger("requests").setLevel(logging.CRITICAL) +log = logging.getLogger() + + +############################################# +# BEGIN IMPORT OF CACHING LIBRARY # +############################################# +# This code is licensed under the MIT # +# License and is a copy of code publicly # +# available in rev. # +# e27332bc82f4e327aedaec17c9b656ae719322ed # +# of https://github.com/tkem/cachetools/ # +############################################# + +class DefaultMapping(collections.MutableMapping): + + __slots__ = () + + @abstractmethod + def __contains__(self, key): # pragma: nocover + return False + + @abstractmethod + def __getitem__(self, key): # pragma: nocover + if hasattr(self.__class__, '__missing__'): + return self.__class__.__missing__(self, key) + else: + raise KeyError(key) + + def get(self, key, default=None): + if key in self: + return self[key] + else: + return default + + __marker = object() + + def pop(self, key, default=__marker): + if key in self: + value = self[key] + del self[key] + elif default is self.__marker: + raise KeyError(key) + else: + value = default + return value + + def setdefault(self, key, default=None): + if key in self: + value = self[key] + else: + self[key] = value = default + return value + + +DefaultMapping.register(dict) + + +class _DefaultSize(object): + def __getitem__(self, _): + return 1 + + def __setitem__(self, _, value): + assert value == 1 + + def pop(self, _): + return 1 + + +class Cache(DefaultMapping): + """Mutable mapping to serve as a simple cache or cache base class.""" + + __size = _DefaultSize() + + def __init__(self, maxsize, missing=None, getsizeof=None): + if missing: + self.__missing = missing + if getsizeof: + self.__getsizeof = getsizeof + self.__size = dict() + self.__data = dict() + self.__currsize = 0 + self.__maxsize = maxsize + + def __repr__(self): + return '%s(%r, maxsize=%r, currsize=%r)' % ( + self.__class__.__name__, + list(self.__data.items()), + self.__maxsize, + self.__currsize, + ) + + def __getitem__(self, key): + try: + return self.__data[key] + except KeyError: + return self.__missing__(key) + + def __setitem__(self, key, value): + maxsize = self.__maxsize + size = self.getsizeof(value) + if size > maxsize: + raise ValueError('value too large') + if key not in self.__data or self.__size[key] < size: + while self.__currsize + size > maxsize: + self.popitem() + if key in self.__data: + diffsize = size - self.__size[key] + else: + diffsize = size + self.__data[key] = value + self.__size[key] = size + self.__currsize += diffsize + + def __delitem__(self, key): + size = self.__size.pop(key) + del self.__data[key] + self.__currsize -= size + + def __contains__(self, key): + return key in self.__data + + def __missing__(self, key): + value = self.__missing(key) + try: + self.__setitem__(key, value) + except ValueError: + pass # value too large + return value + + def __iter__(self): + return iter(self.__data) + + def __len__(self): + return len(self.__data) + + @staticmethod + def __getsizeof(value): + return 1 + + @staticmethod + def __missing(key): + raise KeyError(key) + + @property + def maxsize(self): + """The maximum size of the cache.""" + return self.__maxsize + + @property + def currsize(self): + """The current size of the cache.""" + return self.__currsize + + def getsizeof(self, value): + """Return the size of a cache element's value.""" + return self.__getsizeof(value) + + +class _Link(object): + + __slots__ = ('key', 'expire', 'next', 'prev') + + def __init__(self, key=None, expire=None): + self.key = key + self.expire = expire + + def __reduce__(self): + return _Link, (self.key, self.expire) + + def unlink(self): + next = self.next + prev = self.prev + prev.next = next + next.prev = prev + + +class _Timer(object): + + def __init__(self, timer): + self.__timer = timer + self.__nesting = 0 + + def __call__(self): + if self.__nesting == 0: + return self.__timer() + else: + return self.__time + + def __enter__(self): + if self.__nesting == 0: + self.__time = time = self.__timer() + else: + time = self.__time + self.__nesting += 1 + return time + + def __exit__(self, *exc): + self.__nesting -= 1 + + def __reduce__(self): + return _Timer, (self.__timer,) + + def __getattr__(self, name): + return getattr(self.__timer, name) + + +class TTLCache(Cache): + """LRU Cache implementation with per-item time-to-live (TTL) value.""" + + def __init__(self, maxsize, ttl, timer=time.time, missing=None, + getsizeof=None): + Cache.__init__(self, maxsize, missing, getsizeof) + self.__root = root = _Link() + root.prev = root.next = root + self.__links = collections.OrderedDict() + self.__timer = _Timer(timer) + self.__ttl = ttl + + def __contains__(self, key): + try: + link = self.__links[key] # no reordering + except KeyError: + return False + else: + return not (link.expire < self.__timer()) + + def __getitem__(self, key, cache_getitem=Cache.__getitem__): + try: + link = self.__getlink(key) + except KeyError: + expired = False + else: + expired = link.expire < self.__timer() + if expired: + return self.__missing__(key) + else: + return cache_getitem(self, key) + + def __setitem__(self, key, value, cache_setitem=Cache.__setitem__): + with self.__timer as time: + self.expire(time) + cache_setitem(self, key, value) + try: + link = self.__getlink(key) + except KeyError: + self.__links[key] = link = _Link(key) + else: + link.unlink() + link.expire = time + self.__ttl + link.next = root = self.__root + link.prev = prev = root.prev + prev.next = root.prev = link + + def __delitem__(self, key, cache_delitem=Cache.__delitem__): + cache_delitem(self, key) + link = self.__links.pop(key) + link.unlink() + if link.expire < self.__timer(): + raise KeyError(key) + + def __iter__(self): + root = self.__root + curr = root.next + while curr is not root: + # "freeze" time for iterator access + with self.__timer as time: + if not (curr.expire < time): + yield curr.key + curr = curr.next + + def __len__(self): + root = self.__root + curr = root.next + time = self.__timer() + count = len(self.__links) + while curr is not root and curr.expire < time: + count -= 1 + curr = curr.next + return count + + def __setstate__(self, state): + self.__dict__.update(state) + root = self.__root + root.prev = root.next = root + for link in sorted(self.__links.values(), key=lambda obj: obj.expire): + link.next = root + link.prev = prev = root.prev + prev.next = root.prev = link + self.expire(self.__timer()) + + def __repr__(self, cache_repr=Cache.__repr__): + with self.__timer as time: + self.expire(time) + return cache_repr(self) + + @property + def currsize(self): + with self.__timer as time: + self.expire(time) + return super(TTLCache, self).currsize + + @property + def timer(self): + """The timer function used by the cache.""" + return self.__timer + + @property + def ttl(self): + """The time-to-live value of the cache's items.""" + return self.__ttl + + def expire(self, time=None): + """Remove expired items from the cache.""" + if time is None: + time = self.__timer() + root = self.__root + curr = root.next + links = self.__links + cache_delitem = Cache.__delitem__ + while curr is not root and curr.expire < time: + cache_delitem(self, curr.key) + del links[curr.key] + next = curr.next + curr.unlink() + curr = next + + def clear(self): + with self.__timer as time: + self.expire(time) + Cache.clear(self) + + def get(self, *args, **kwargs): + with self.__timer: + return Cache.get(self, *args, **kwargs) + + def pop(self, *args, **kwargs): + with self.__timer: + return Cache.pop(self, *args, **kwargs) + + def setdefault(self, *args, **kwargs): + with self.__timer: + return Cache.setdefault(self, *args, **kwargs) + + def popitem(self): + """Remove and return the `(key, value)` pair least recently used that + has not already expired. + + """ + with self.__timer as time: + self.expire(time) + try: + key = next(iter(self.__links)) + except StopIteration: + raise KeyError('%s is empty' % self.__class__.__name__) + else: + return (key, self.pop(key)) + + if hasattr(collections.OrderedDict, 'move_to_end'): + def __getlink(self, key): + value = self.__links[key] + self.__links.move_to_end(key) + return value + else: + def __getlink(self, key): + value = self.__links.pop(key) + self.__links[key] = value + return value + + +############################################# +# END IMPORT OF CACHING LIBRARY # +############################################# + + +cache = TTLCache( + 100, # Up to 100 items + 5 * 60 # 5 minute cache life +) +userCache = TTLCache( + 2, # Up to 2 items + 60 # 1 minute cache life +) + + +class UnknownUserException(Exception): + pass + + +def WAAuth(parser): + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('username', help='WA Username') + parser.add_argument('password', help='WA Password') + + +def OrgOrGuess(parser): + parser.add_argument('--org_json', type=argparse.FileType("r"), help='Apollo JSON output, source for common name') + parser.add_argument('--org_raw', help='Common Name') + parser.add_argument('--org_id', help='Organism ID') + + +def CnOrGuess(parser): + OrgOrGuess(parser) + parser.add_argument('--seq_fasta', type=argparse.FileType("r"), help='Fasta file, IDs used as sequence sources') + parser.add_argument('--seq_raw', nargs='*', help='Sequence Names') + + +def GuessOrg(args, wa): + if args.org_json: + orgs = [x.get('commonName', None) + for x in json.load(args.org_json)] + orgs = [x for x in orgs if x is not None] + return orgs + elif args.org_raw: + org = args.org_raw.strip() + if len(org) > 0: + return [org] + else: + raise Exception("Organism Common Name not provided") + elif args.org_id: + return [wa.organisms.findOrganismById(args.org_id).get('commonName', None)] + else: + raise Exception("Organism Common Name not provided") + + +def GuessCn(args, wa): + org = GuessOrg(args, wa) + seqs = [] + if args.seq_fasta: + # If we have a fasta, pull all rec ids from that. + for rec in SeqIO.parse(args.seq_fasta, 'fasta'): + seqs.append(rec.id) + elif args.seq_raw: + # Otherwise raw list. + seqs = [x.strip() for x in args.seq_raw if len(x.strip()) > 0] + + return org, seqs + + +def AssertUser(user_list): + if len(user_list) == 0: + raise UnknownUserException() + elif len(user_list) == 1: + return user_list[0] + else: + raise Exception("Too many users!") + + +def AssertAdmin(user): + if user.role == 'ADMIN': + return True + else: + raise Exception("User is not an administrator. Permission denied") + + +class WebApolloInstance(object): + + def __init__(self, url, username, password): + self.apollo_url = url + self.username = username + self.password = password + + self.annotations = AnnotationsClient(self) + self.groups = GroupsClient(self) + self.io = IOClient(self) + self.organisms = OrganismsClient(self) + self.users = UsersClient(self) + self.metrics = MetricsClient(self) + self.bio = RemoteRecord(self) + self.status = StatusClient(self) + self.canned_comments = CannedCommentsClient(self) + self.canned_keys = CannedKeysClient(self) + self.canned_values = CannedValuesClient(self) + + def __str__(self): + return '' % self.apollo_url + + def requireUser(self, email): + cacheKey = 'user-list' + try: + # Get the cached value + data = userCache[cacheKey] + except KeyError: + # If we hit a key error above, indicating that + # we couldn't find the key, we'll simply re-request + # the data + data = self.users.loadUsers() + userCache[cacheKey] = data + + return AssertUser([x for x in data if x.username == email]) + + +class GroupObj(object): + def __init__(self, **kwargs): + self.name = kwargs['name'] + + if 'id' in kwargs: + self.groupId = kwargs['id'] + + +class UserObj(object): + ROLE_USER = 'USER' + ROLE_ADMIN = 'ADMIN' + + def __init__(self, **kwargs): + # Generally expect 'userId', 'firstName', 'lastName', 'username' (email) + for attr in kwargs.keys(): + setattr(self, attr, kwargs[attr]) + + if 'groups' in kwargs: + groups = [] + for groupData in kwargs['groups']: + groups.append(GroupObj(**groupData)) + self.groups = groups + + self.__props = kwargs.keys() + + def isAdmin(self): + if hasattr(self, 'role'): + return self.role == self.ROLE_ADMIN + return False + + def refresh(self, wa): + # This method requires some sleeping usually. + newU = wa.users.loadUser(self).toDict() + for prop in newU: + setattr(self, prop, newU[prop]) + + def toDict(self): + data = {} + for prop in self.__props: + data[prop] = getattr(self, prop) + return data + + def orgPerms(self): + for orgPer in self.organismPermissions: + if len(orgPer['permissions']) > 2: + orgPer['permissions'] = json.loads(orgPer['permissions']) + yield orgPer + + def __str__(self): + return '>' % (self.userId, self.firstName, + self.lastName, self.username) + + +class Client(object): + + def __init__(self, webapolloinstance, **requestArgs): + self._wa = webapolloinstance + + self.__verify = requestArgs.get('verify', True) + self._requestArgs = requestArgs + + if 'verify' in self._requestArgs: + del self._requestArgs['verify'] + + def request(self, clientMethod, data, post_params={}, isJson=True): + url = self._wa.apollo_url + self.CLIENT_BASE + clientMethod + + headers = { + 'Content-Type': 'application/json' + } + + data.update({ + 'username': self._wa.username, + 'password': self._wa.password, + }) + + r = requests.post(url, data=json.dumps(data), headers=headers, + verify=self.__verify, params=post_params, allow_redirects=False, **self._requestArgs) + + if r.status_code == 200 or r.status_code == 302: + if isJson: + d = r.json() + if 'username' in d: + del d['username'] + if 'password' in d: + del d['password'] + return d + else: + return r.text + + # @see self.body for HTTP response body + raise Exception("Unexpected response from apollo %s: %s" % + (r.status_code, r.text)) + + def get(self, clientMethod, get_params): + url = self._wa.apollo_url + self.CLIENT_BASE + clientMethod + headers = {} + + r = requests.get(url, headers=headers, verify=self.__verify, + params=get_params, **self._requestArgs) + if r.status_code == 200: + d = r.json() + if 'username' in d: + del d['username'] + if 'password' in d: + del d['password'] + return d + # @see self.body for HTTP response body + raise Exception("Unexpected response from apollo %s: %s" % + (r.status_code, r.text)) + + +class MetricsClient(Client): + CLIENT_BASE = '/metrics/' + + def getServerMetrics(self): + return self.get('metrics', {}) + + +class AnnotationsClient(Client): + CLIENT_BASE = '/annotationEditor/' + + def _update_data(self, data): + if not hasattr(self, '_extra_data'): + raise Exception("Please call setSequence first") + + data.update(self._extra_data) + return data + + def setSequence(self, sequence, organism): + self._extra_data = { + 'sequence': sequence, + 'organism': organism, + } + + def setDescription(self, featureDescriptions): + data = { + 'features': featureDescriptions, + } + data = self._update_data(data) + return self.request('setDescription', data) + + def setName(self, uniquename, name): + # TODO + data = { + 'features': [ + { + 'uniquename': uniquename, + 'name': name, + } + ], + } + data = self._update_data(data) + return self.request('setName', data) + + def setNames(self, features): + # TODO + data = { + 'features': features, + } + data = self._update_data(data) + return self.request('setName', data) + + def setStatus(self, statuses): + # TODO + data = { + 'features': statuses, + } + data = self._update_data(data) + return self.request('setStatus', data) + + def setSymbol(self, symbols): + data = { + 'features': symbols, + } + data.update(self._extra_data) + return self.request('setSymbol', data) + + def getComments(self, feature_id): + data = { + 'features': [{'uniquename': feature_id}], + } + data = self._update_data(data) + return self.request('getComments', data) + + def addComments(self, feature_id, comments): + # TODO: This is probably not great and will delete comments, if I had to guess... + data = { + 'features': [ + { + 'uniquename': feature_id, + 'comments': comments + } + ], + } + data = self._update_data(data) + return self.request('addComments', data) + + def addAttributes(self, feature_id, attributes): + nrps = [] + for (key, values) in attributes.items(): + for value in values: + nrps.append({ + 'tag': key, + 'value': value + }) + + data = { + 'features': [ + { + 'uniquename': feature_id, + 'non_reserved_properties': nrps + } + ] + } + data = self._update_data(data) + return self.request('addAttribute', data) + + def deleteAttribute(self, feature_id, key, value): + data = { + 'features': [ + { + 'uniquename': feature_id, + 'non_reserved_properties': [ + {'tag': key, 'value': value} + ] + } + ] + } + data = self._update_data(data) + return self.request('addAttribute', data) + + def getFeatures(self): + data = self._update_data({}) + return self.request('getFeatures', data) + + def getSequence(self, uniquename): + data = { + 'features': [ + {'uniquename': uniquename} + ] + } + data = self._update_data(data) + return self.request('getSequence', data) + + def addFeature(self, feature, trustme=False): + if not trustme: + raise NotImplementedError("Waiting on better docs from project. If you know what you are doing, pass trustme=True to this function.") + + data = { + 'features': feature, + } + data = self._update_data(data) + return self.request('addFeature', data) + + def addTranscript(self, transcript, trustme=False): + if not trustme: + raise NotImplementedError("Waiting on better docs from project. If you know what you are doing, pass trustme=True to this function.") + + data = {} + data.update(transcript) + data = self._update_data(data) + return self.request('addTranscript', data) + + # addExon, add/delete/updateComments, addTranscript skipped due to docs + + def duplicateTranscript(self, transcriptId): + data = { + 'features': [{'uniquename': transcriptId}] + } + + data = self._update_data(data) + return self.request('duplicateTranscript', data) + + def setTranslationStart(self, uniquename, start): + data = { + 'features': [{ + 'uniquename': uniquename, + 'location': { + 'fmin': start + } + }] + } + data = self._update_data(data) + return self.request('setTranslationStart', data) + + def setTranslationEnd(self, uniquename, end): + data = { + 'features': [{ + 'uniquename': uniquename, + 'location': { + 'fmax': end + } + }] + } + data = self._update_data(data) + return self.request('setTranslationEnd', data) + + def setLongestOrf(self, uniquename): + data = { + 'features': [{ + 'uniquename': uniquename, + }] + } + data = self._update_data(data) + return self.request('setLongestOrf', data) + + def setBoundaries(self, uniquename, start, end): + data = { + 'features': [{ + 'uniquename': uniquename, + 'location': { + 'fmin': start, + 'fmax': end, + } + }] + } + data = self._update_data(data) + return self.request('setBoundaries', data) + + def getSequenceAlterations(self): + data = { + } + data = self._update_data(data) + return self.request('getSequenceAlterations', data) + + def setReadthroughStopCodon(self, uniquename): + data = { + 'features': [{ + 'uniquename': uniquename, + }] + } + data = self._update_data(data) + return self.request('setReadthroughStopCodon', data) + + def deleteSequenceAlteration(self, uniquename): + data = { + 'features': [{ + 'uniquename': uniquename, + }] + } + data = self._update_data(data) + return self.request('deleteSequenceAlteration', data) + + def flipStrand(self, uniquenames): + data = { + 'features': [ + {'uniquename': x} for x in uniquenames + ] + } + data = self._update_data(data) + return self.request('flipStrand', data) + + def mergeExons(self, exonA, exonB): + data = { + 'features': [ + {'uniquename': exonA}, + {'uniquename': exonB}, + ] + } + data = self._update_data(data) + return self.request('mergeExons', data) + + # def splitExon(): pass + + def deleteFeatures(self, uniquenames): + assert isinstance(uniquenames, collections.Iterable) + data = { + 'features': [ + {'uniquename': x} for x in uniquenames + ] + } + data = self._update_data(data) + return self.request('deleteFeature', data) + + # def deleteExon(): pass + + # def makeIntron(self, uniquename, ): pass + + def getSequenceSearchTools(self): + return self.get('getSequenceSearchTools', {}) + + def getCannedComments(self): + return self.get('getCannedComments', {}) + + def searchSequence(self, searchTool, sequence, database): + data = { + 'key': searchTool, + 'residues': sequence, + 'database_id': database, + } + return self.request('searchSequences', data) + + def getGff3(self, uniquenames): + assert isinstance(uniquenames, collections.Iterable) + data = { + 'features': [ + {'uniquename': x} for x in uniquenames + ] + } + data = self._update_data(data) + return self.request('getGff3', data, isJson=False) + + +class GroupsClient(Client): + CLIENT_BASE = '/group/' + + def createGroup(self, name): + data = {'name': name} + return self.request('createGroup', data) + + def getOrganismPermissionsForGroup(self, group): + data = { + 'id': group.groupId, + 'name': group.name, + } + return self.request('getOrganismPermissionsForGroup', data) + + def loadGroup(self, group): + return self.loadGroupById(group.groupId) + + def loadGroupById(self, groupId): + res = self.request('loadGroups', {'groupId': groupId}) + if isinstance(res, list): + # We can only match one, right? + return GroupObj(**res[0]) + else: + return res + + def loadGroupByName(self, name): + res = self.request('loadGroups', {'name': name}) + if isinstance(res, list): + # We can only match one, right? + return GroupObj(**res[0]) + else: + return res + + def loadGroups(self, group=None): + res = self.request('loadGroups', {}) + data = [GroupObj(**x) for x in res] + if group is not None: + data = [x for x in data if x.name == group] + + return data + + def deleteGroup(self, group): + data = { + 'id': group.groupId, + 'name': group.name, + } + return self.request('deleteGroup', data) + + def updateGroup(self, group, newName): + # TODO: Sure would be nice if modifying ``group.name`` would invoke + # this? + data = { + 'id': group.groupId, + 'name': newName, + } + return self.request('updateGroup', data) + + def updateOrganismPermission(self, group, organismName, + administrate=False, write=False, read=False, + export=False): + data = { + 'groupId': group.groupId, + 'organism': organismName, + 'ADMINISTRATE': administrate, + 'WRITE': write, + 'EXPORT': export, + 'READ': read, + } + return self.request('updateOrganismPermission', data) + + def updateMembership(self, group, users): + data = { + 'groupId': group.groupId, + 'user': [user.email for user in users] + } + return self.request('updateMembership', data) + + +class IOClient(Client): + CLIENT_BASE = '/IOService/' + + def write(self, exportType='FASTA', seqType='peptide', + exportFormat='text', sequences=None, organism=None, + output='text', exportAllSequences=False, + exportGff3Fasta=False): + if exportType not in ('FASTA', 'GFF3'): + raise Exception("exportType must be one of FASTA, GFF3") + + if seqType not in ('peptide', 'cds', 'cdna', 'genomic'): + raise Exception("seqType must be one of peptide, cds, dna, genomic") + + if exportFormat not in ('gzip', 'text'): + raise Exception("exportFormat must be one of gzip, text") + + if output not in ('file', 'text'): + raise Exception("output must be one of file, text") + + data = { + 'type': exportType, + 'seqType': seqType, + 'format': exportFormat, + 'sequences': sequences, + 'organism': organism, + 'output': output, + 'exportAllSequences': exportAllSequences, + 'exportGff3Fasta': exportGff3Fasta, + } + + return self.request('write', data, isJson=output == 'file') + + def download(self, uuid, outputFormat='gzip'): + + if outputFormat.lower() not in ('gzip', 'text'): + raise Exception("outputFormat must be one of file, text") + + data = { + 'format': outputFormat, + 'uuid': uuid, + } + return self.request('write', data) + + +class StatusClient(Client): + CLIENT_BASE = '/availableStatus/' + + def addStatus(self, value): + data = { + 'value': value + } + + return self.request('createStatus', data) + + def findAllStatuses(self): + return self.request('showStatus', {}) + + def findStatusByValue(self, value): + statuses = self.findAllStatuses() + statuses = [x for x in statuses if x['value'] == value] + if len(statuses) == 0: + raise Exception("Unknown status value") + else: + return statuses[0] + + def findStatusById(self, id_number): + statuses = self.findAllStatuses() + statuses = [x for x in statuses if str(x['id']) == str(id_number)] + if len(statuses) == 0: + raise Exception("Unknown ID") + else: + return statuses[0] + + def updateStatus(self, id_number, new_value): + data = { + 'id': id_number, + 'new_value': new_value + } + + return self.request('updateStatus', data) + + def deleteStatus(self, id_number): + data = { + 'id': id_number + } + + return self.request('deleteStatus', data) + + +class CannedCommentsClient(Client): + CLIENT_BASE = '/cannedComment/' + + def addComment(self, comment, metadata=""): + data = { + 'comment': comment, + 'metadata': metadata + } + + return self.request('createComment', data) + + def findAllComments(self): + return self.request('showComment', {}) + + def findCommentByValue(self, value): + comments = self.findAllComments() + comments = [x for x in comments if x['comment'] == value] + if len(comments) == 0: + raise Exception("Unknown comment") + else: + return comments[0] + + def findCommentById(self, id_number): + comments = self.findAllComments() + comments = [x for x in comments if str(x['id']) == str(id_number)] + if len(comments) == 0: + raise Exception("Unknown ID") + else: + return comments[0] + + def updateComment(self, id_number, new_value, metadata=None): + data = { + 'id': id_number, + 'new_comment': new_value + } + + if metadata is not None: + data['metadata'] = metadata + + return self.request('updateComment', data) + + def deleteComment(self, id_number): + data = { + 'id': id_number + } + + return self.request('deleteComment', data) + + +class CannedKeysClient(Client): + CLIENT_BASE = '/cannedKey/' + + def addKey(self, key, metadata=""): + data = { + 'key': key, + 'metadata': metadata + } + + return self.request('createKey', data) + + def findAllKeys(self): + return self.request('showKey', {}) + + def findKeyByValue(self, value): + keys = self.findAllKeys() + keys = [x for x in keys if x['label'] == value] + if len(keys) == 0: + raise Exception("Unknown key") + else: + return keys[0] + + def findKeyById(self, id_number): + keys = self.findAllKeys() + keys = [x for x in keys if str(x['id']) == str(id_number)] + if len(keys) == 0: + raise Exception("Unknown ID") + else: + return keys[0] + + def updateKey(self, id_number, new_key, metadata=None): + data = { + 'id': id_number, + 'new_key': new_key + } + + if metadata is not None: + data['metadata'] = metadata + + return self.request('updateKey', data) + + def deleteKey(self, id_number): + data = { + 'id': id_number + } + + return self.request('deleteKey', data) + + +class CannedValuesClient(Client): + CLIENT_BASE = '/cannedValue/' + + def addValue(self, value, metadata=""): + data = { + 'value': value, + 'metadata': metadata + } + + return self.request('createValue', data) + + def findAllValues(self): + return self.request('showValue', {}) + + def findValueByValue(self, value): + values = self.findAllValues() + values = [x for x in values if x['label'] == value] + if len(values) == 0: + raise Exception("Unknown value") + else: + return values[0] + + def findValueById(self, id_number): + values = self.findAllValues() + values = [x for x in values if str(x['id']) == str(id_number)] + if len(values) == 0: + raise Exception("Unknown ID") + else: + return values[0] + + def updateValue(self, id_number, new_value, metadata=None): + data = { + 'id': id_number, + 'new_value': new_value + } + + if metadata is not None: + data['metadata'] = metadata + + return self.request('updateValue', data) + + def deleteValue(self, id_number): + data = { + 'id': id_number + } + + return self.request('deleteValue', data) + + +class OrganismsClient(Client): + CLIENT_BASE = '/organism/' + + def addOrganism(self, commonName, directory, blatdb=None, species=None, + genus=None, public=False): + data = { + 'commonName': commonName, + 'directory': directory, + 'publicMode': public, + } + + if blatdb is not None: + data['blatdb'] = blatdb + if genus is not None: + data['genus'] = genus + if species is not None: + data['species'] = species + + return self.request('addOrganism', data) + + def findAllOrganisms(self): + return self.request('findAllOrganisms', {}) + + def findOrganismByCn(self, cn): + orgs = self.findAllOrganisms() + orgs = [x for x in orgs if x['commonName'] == cn] + if len(orgs) == 0: + raise Exception("Unknown common name") + else: + return orgs[0] + + def findOrganismById(self, id_number): + orgs = self.findAllOrganisms() + orgs = [x for x in orgs if str(x['id']) == str(id_number)] + if len(orgs) == 0: + raise Exception("Unknown ID") + else: + return orgs[0] + + def deleteOrganism(self, organismId): + return self.request('deleteOrganism', {'id': organismId}) + + def deleteOrganismFeatures(self, organismId): + return self.request('deleteOrganismFeatures', {'id': organismId}) + + def getSequencesForOrganism(self, commonName): + return self.request('getSequencesForOrganism', {'organism': commonName}) + + def updateOrganismInfo(self, organismId, commonName, directory, blatdb=None, species=None, genus=None, public=False): + data = { + 'id': organismId, + 'name': commonName, + 'directory': directory, + 'publicMode': public, + } + + if blatdb is not None: + data['blatdb'] = blatdb + if genus is not None: + data['genus'] = genus + if species is not None: + data['species'] = species + + return self.request('updateOrganismInfo', data) + + +class UsersClient(Client): + CLIENT_BASE = '/user/' + + # Real one + # def getOrganismPermissionsForUser(self, user): + # data = { + # 'userId': user.userId, + # } + # return self.request('getOrganismPermissionsForUser', data) + + # Utter frigging hack + def getOrganismPermissionsForUser(self, user): + return self.loadUser(user).organismPermissions + + def updateOrganismPermission(self, user, organism, administrate=False, + write=False, export=False, read=False): + data = { + 'userId': user.userId, + 'organism': organism, + 'ADMINISTRATE': administrate, + 'WRITE': write, + 'EXPORT': export, + 'READ': read, + } + return self.request('updateOrganismPermission', data) + + def loadUser(self, user): + return self.loadUserById(user.userId) + + def loadUserById(self, userId): + res = self.request('loadUsers', {'userId': userId}) + if isinstance(res, list): + # We can only match one, right? + return UserObj(**res[0]) + else: + return res + + def loadUsers(self, email=None): + res = self.request('loadUsers', {}) + data = [UserObj(**x) for x in res] + if email is not None: + data = [x for x in data if x.username == email] + + return data + + def addUserToGroup(self, group, user): + data = {'group': group.name, 'userId': user.userId} + return self.request('addUserToGroup', data) + + def removeUserFromGroup(self, group, user): + data = {'group': group.name, 'userId': user.userId} + return self.request('removeUserFromGroup', data) + + def createUser(self, email, firstName, lastName, newPassword, role="user", groups=None): + data = { + 'firstName': firstName, + 'lastName': lastName, + 'email': email, + 'role': role, + 'groups': [] if groups is None else groups, + # 'availableGroups': [], + 'newPassword': newPassword, + # 'organismPermissions': [], + } + return self.request('createUser', data) + + def deleteUser(self, user): + return self.request('deleteUser', {'userId': user.userId}) + + def updateUser(self, user, email, firstName, lastName, newPassword): + data = { + 'userId': user.userId, + 'email': email, + 'firstName': firstName, + 'lastName': lastName, + 'newPassword': newPassword, + } + return self.request('updateUser', data) + + +class RemoteRecord(Client): + CLIENT_BASE = None + + def ParseRecord(self, cn): + org = self._wa.organisms.findOrganismByCn(cn) + self._wa.annotations.setSequence(org['commonName'], org['id']) + + data = io.StringIO(self._wa.io.write( + exportType='GFF3', + seqType='genomic', + exportAllSequences=False, + exportGff3Fasta=True, + output="text", + exportFormat="text", + sequences=cn, + )) + data.seek(0) + + for record in GFF.parse(data): + yield WebApolloSeqRecord(record, self._wa) + + +class WebApolloSeqRecord(object): + def __init__(self, sr, wa): + self._sr = sr + self._wa = wa + + def __dir__(self): + return dir(self._sr) + + def __getattr__(self, key): + if key in ('_sr', '_wa'): + return self.__dict__[key] + else: + if key == 'features': + return (WebApolloSeqFeature(x, self._wa) + for x in self._sr.__dict__[key]) + else: + return self._sr.__dict__[key] + + def __setattr__(self, key, value): + if key in ('_sd', '_wa'): + self.__dict__[key] = value + else: + self._sr.__dict__[key] = value + # Methods acting on the SeqRecord object + + +class WebApolloSeqFeature(object): + def __init__(self, sf, wa): + self._sf = sf + self._wa = wa + + def __dir__(self): + return dir(self._sf) + + def __getattr__(self, key): + if key in ('_sf', '_wa'): + return self.__dict__[key] + else: + return self._sf.__dict__[key] + + def __setattr__(self, key, value): + if key in ('_sf', '_wa'): + self.__dict__[key] = value + else: + # Methods acting on the SeqFeature object + if key == 'location': + if value.strand != self._sf.location.strand: + self.wa.annotations.flipStrand( + self._sf.qualifiers['ID'][0] + ) + + self.wa.annotations.setBoundaries( + self._sf.qualifiers['ID'][0], + value.start, + value.end, + ) + + self._sf.__dict__[key] = value + else: + self._sf.__dict__[key] = value + + +def _tnType(feature): + if feature.type in ('gene', 'mRNA', 'exon', 'CDS', 'terminator', 'tRNA'): + return feature.type + else: + return 'exon' + + +def _yieldFeatData(features): + for f in features: + current = { + 'location': { + 'strand': f.strand, + 'fmin': int(f.location.start), + 'fmax': int(f.location.end), + }, + 'type': { + 'name': _tnType(f), + 'cv': { + 'name': 'sequence', + } + }, + } + if f.type in ('gene', 'mRNA'): + current['name'] = f.qualifiers.get('Name', [f.id])[0] + if hasattr(f, 'sub_features') and len(f.sub_features) > 0: + current['children'] = [x for x in _yieldFeatData(f.sub_features)] + + yield current + + +def featuresToFeatureSchema(features): + compiled = [] + for feature in features: + # if feature.type != 'gene': + # log.warn("Not able to handle %s features just yet...", feature.type) + # continue + + for x in _yieldFeatData([feature]): + compiled.append(x) + return compiled + + +def accessible_organisms(user, orgs): + permissionMap = { + x['organism']: x['permissions'] + for x in user.organismPermissions + if 'WRITE' in x['permissions'] or + 'READ' in x['permissions'] or + 'ADMINISTRATE' in x['permissions'] or + user.role == 'ADMIN' + } + + if 'error' in orgs: + raise Exception("Error received from Apollo server: \"%s\"" % orgs['error']) + + return [ + (org['commonName'], org['id'], False) + for org in sorted(orgs, key=lambda x: x['commonName']) + if org['commonName'] in permissionMap + ] + + +def galaxy_list_groups(trans, *args, **kwargs): + email = trans.get_user().email + wa = WebApolloInstance( + os.environ['GALAXY_WEBAPOLLO_URL'], + os.environ['GALAXY_WEBAPOLLO_USER'], + os.environ['GALAXY_WEBAPOLLO_PASSWORD'] + ) + # Assert that the email exists in apollo + try: + gx_user = wa.requireUser(email) + except UnknownUserException: + return [] + + # Key for cached data + cacheKey = 'groups-' + email + # We don't want to trust "if key in cache" because between asking and fetch + # it might through key error. + if cacheKey not in cache: + # However if it ISN'T there, we know we're safe to fetch + put in + # there. + data = _galaxy_list_groups(wa, gx_user, *args, **kwargs) + cache[cacheKey] = data + return data + try: + # The cache key may or may not be in the cache at this point, it + # /likely/ is. However we take no chances that it wasn't evicted between + # when we checked above and now, so we reference the object from the + # cache in preparation to return. + data = cache[cacheKey] + return data + except KeyError: + # If access fails due to eviction, we will fail over and can ensure that + # data is inserted. + data = _galaxy_list_groups(wa, gx_user, *args, **kwargs) + cache[cacheKey] = data + return data + + +def _galaxy_list_groups(wa, gx_user, *args, **kwargs): + # Fetch the groups. + group_data = [] + for group in wa.groups.loadGroups(): + # Reformat + group_data.append((group.name, group.groupId, False)) + return group_data + + +def galaxy_list_orgs(trans, *args, **kwargs): + email = trans.get_user().email + wa = WebApolloInstance( + os.environ['GALAXY_WEBAPOLLO_URL'], + os.environ['GALAXY_WEBAPOLLO_USER'], + os.environ['GALAXY_WEBAPOLLO_PASSWORD'] + ) + try: + gx_user = wa.requireUser(email) + except UnknownUserException: + return [] + + # Key for cached data + cacheKey = 'orgs-' + email + if cacheKey not in cache: + data = _galaxy_list_orgs(wa, gx_user, *args, **kwargs) + cache[cacheKey] = data + return data + try: + data = cache[cacheKey] + return data + except KeyError: + data = _galaxy_list_orgs(wa, gx_user, *args, **kwargs) + cache[cacheKey] = data + return data + + +def _galaxy_list_orgs(wa, gx_user, *args, **kwargs): + # Fetch all organisms + all_orgs = wa.organisms.findAllOrganisms() + # Figure out which are accessible to the user + orgs = accessible_organisms(gx_user, all_orgs) + # Return org list + return orgs + + +def galaxy_list_users(trans, *args, **kwargs): + email = trans.get_user().email + wa = WebApolloInstance( + os.environ['GALAXY_WEBAPOLLO_URL'], + os.environ['GALAXY_WEBAPOLLO_USER'], + os.environ['GALAXY_WEBAPOLLO_PASSWORD'] + ) + # Assert that the email exists in apollo + try: + gx_user = wa.requireUser(email) + except UnknownUserException: + return [] + + # Key for cached data + cacheKey = 'users-' + email + # We don't want to trust "if key in cache" because between asking and fetch + # it might through key error. + if cacheKey not in cache: + # However if it ISN'T there, we know we're safe to fetch + put in + # there. + data = _galaxy_list_users(wa, gx_user, *args, **kwargs) + cache[cacheKey] = data + return data + try: + # The cache key may or may not be in the cache at this point, it + # /likely/ is. However we take no chances that it wasn't evicted between + # when we checked above and now, so we reference the object from the + # cache in preparation to return. + data = cache[cacheKey] + return data + except KeyError: + # If access fails due to eviction, we will fail over and can ensure that + # data is inserted. + data = _galaxy_list_users(wa, gx_user, *args, **kwargs) + cache[cacheKey] = data + return data + + +def _galaxy_list_users(wa, gx_user, *args, **kwargs): + # Fetch the users. + user_data = [] + for user in wa.users.loadUsers(): + # Reformat + user_data.append((user.username, user.username, False)) + return user_data + + +# This is all for implementing the command line interface for testing. +class obj(object): + pass + + +class fakeTrans(object): + + def __init__(self, username): + self.un = username + + def get_user(self): + o = obj() + o.email = self.un + return o + + +def retry(closure, sleep=1, limit=5): + """ + Apollo has the bad habit of returning 500 errors if you call APIs + too quickly, largely because of the unholy things that happen in + grails. + + To deal with the fact that we cannot send an addComments call too + quickly after a createFeature call, we have this function that will + keep calling a closure until it works. + """ + count = 0 + while True: + count += 1 + + if count >= limit: + return False + try: + # Try calling it + closure() + # If successful, exit + return True + except Exception as e: + log.info(str(e)[0:100]) + time.sleep(sleep) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Test access to apollo server') + parser.add_argument('email', help='Email of user to test') + parser.add_argument('--action', choices=['org', 'group', 'users'], default='org', help='Data set to test, fetch a list of groups or users known to the requesting user.') + args = parser.parse_args() + + trans = fakeTrans(args.email) + if args.action == 'org': + for f in galaxy_list_orgs(trans): + print(f) + elif args.action == 'group': + for f in galaxy_list_groups(trans): + print(f) + else: + for f in galaxy_list_users(trans): + print(f)