Next changeset 1:46c766c7baa7 (2017-09-19) |
Commit message:
planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit f745b23c84a615bf434d717c8c0e553a012f0268 |
added:
README.rst create_account.py create_features_from_gff3.py create_features_from_gff3.xml create_or_update_organism.py delete_features.py delete_organism.py export.py fetch_organism_jbrowse.py json2iframe.py list_organisms.py macros.xml test-data/bad-model.gff3 test-data/fake.json test-data/good-model.gff3 webapollo.py |
b |
diff -r 000000000000 -r 5aa3bc8d0253 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Sep 11 05:45:37 2017 -0400 |
b |
@@ -0,0 +1,39 @@ +Galaxy-apollo +============= + +Galaxy tools to interface with Apollo The webapollo.py file is also +`separately +available <https://github.com/galaxy-genome-annotation/python-apollo>`__ +as a pip-installable package. + +Environ + +The following environment variables must be set: + ++--------------------------------+-----------------------------------------------------------+ +| ENV | Use | ++================================+===========================================================+ +| ``$GALAXY_WEBAPOLLO_URL`` | The URL at which Apollo is accessible, internal to Galaxy | +| | and where the tools run. Must be absolute, with FQDN and | +| | protocol. | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_WEBAPOLLO_USER`` | The admin user which Galaxy should use to talk to Apollo. | +| | | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_WEBAPOLLO_PASSWORD`` | The password for the admin user. | +| | | +| | | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_WEBAPOLLO_EXT_URL`` | users. May be relative or absolute. | +| | The external URL at which Apollo is accessible to end | +| | | ++--------------------------------+-----------------------------------------------------------+ +| ``$GALAXY_SHARED_DIR`` | Directory shared between Galaxy and Apollo, used to | +| | exchange JBrowse instances. | ++--------------------------------+-----------------------------------------------------------+ + +License +------- + +All python scripts, wrappers, and the webapollo.py are licensed under +MIT license. |
b |
diff -r 000000000000 -r 5aa3bc8d0253 create_account.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_account.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,44 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import random +import time + +from builtins import range, str + +from webapollo import WAAuth, WebApolloInstance + + +def pwgen(length): + chars = list('qwrtpsdfghjklzxcvbnm') + return ''.join(random.choice(chars) for _ in range(length)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an account via web services') + WAAuth(parser) + + parser.add_argument('email', help='User Email') + parser.add_argument('--first', help='First Name', default='Jane') + parser.add_argument('--last', help='Last Name', default='Aggie') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + password = pwgen(12) + time.sleep(1) + users = wa.users.loadUsers() + user = [u for u in users + if u.username == args.email] + + if len(user) == 1: + # Update name, regen password if the user ran it again + userObj = user[0] + returnData = wa.users.updateUser(userObj, args.email, args.first, args.last, password) + print('Updated User\nUsername: %s\nPassword: %s' % (args.email, password)) + else: + returnData = wa.users.createUser(args.email, args.first, args.last, password, role='user') + print('Created User\nUsername: %s\nPassword: %s' % (args.email, password)) + + print("Return data: " + str(returnData)) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 create_features_from_gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_features_from_gff3.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
b'@@ -0,0 +1,186 @@\n+#!/usr/bin/env python\n+import argparse\n+import logging\n+import sys\n+import time\n+\n+from builtins import str\n+\n+from BCBio import GFF\n+\n+from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry\n+logging.basicConfig(level=logging.INFO)\n+log = logging.getLogger(__name__)\n+\n+\n+if __name__ == \'__main__\':\n+ parser = argparse.ArgumentParser(description=\'Sample script to add an attribute to a feature via web services\')\n+ WAAuth(parser)\n+ parser.add_argument(\'email\', help=\'User Email\')\n+ parser.add_argument(\'--source\', help=\'URL where the input dataset can be found.\')\n+ OrgOrGuess(parser)\n+\n+ parser.add_argument(\'gff3\', type=argparse.FileType(\'r\'), help=\'GFF3 file\')\n+ args = parser.parse_args()\n+\n+ wa = WebApolloInstance(args.apollo, args.username, args.password)\n+ # User must have an account\n+ gx_user = AssertUser(wa.users.loadUsers(email=args.email))\n+\n+ # Get organism\n+ org_cn = GuessOrg(args, wa)\n+ if isinstance(org_cn, list):\n+ org_cn = org_cn[0]\n+\n+ # TODO: Check user perms on org.\n+ org = wa.organisms.findOrganismByCn(org_cn)\n+\n+ bad_quals = [\'date_creation\', \'source\', \'owner\', \'date_last_modified\', \'Name\', \'ID\']\n+\n+ sys.stdout.write(\'# \')\n+ sys.stdout.write(\'\\t\'.join([\'Feature ID\', \'Apollo ID\', \'Success\', \'Messages\']))\n+ sys.stdout.write(\'\\n\')\n+ # print(wa.annotations.getFeatures())\n+ for rec in GFF.parse(args.gff3):\n+ wa.annotations.setSequence(rec.id, org[\'id\'])\n+ for feature in rec.features:\n+ # We can only handle genes right now\n+ if feature.type not in (\'gene\', \'terminator\'):\n+ continue\n+ # Convert the feature into a presentation that Apollo will accept\n+ featureData = featuresToFeatureSchema([feature])\n+ if \'children\' in featureData[0] and any([child[\'type\'][\'name\'] == \'tRNA\' for child in featureData[0][\'children\']]):\n+ # We\'re experiencing a (transient?) problem where gene_001 to\n+ # gene_025 will be rejected. Thus, hardcode to a known working\n+ # gene name and update later.\n+\n+ featureData[0][\'name\'] = \'tRNA_000\'\n+ tRNA_sf = [child for child in feature.sub_features if child.type == \'tRNA\'][0]\n+ tRNA_type = \'tRNA-\' + tRNA_sf.qualifiers.get(\'Codon\', ["Unk"])[0]\n+\n+ if \'Name\' in feature.qualifiers:\n+ if feature.qualifiers[\'Name\'][0].startswith(\'tRNA-\'):\n+ tRNA_type = feature.qualifiers[\'Name\'][0]\n+\n+ newfeature = wa.annotations.addFeature(featureData, trustme=True)\n+\n+ def func0():\n+ wa.annotations.setName(\n+ newfeature[\'features\'][0][\'uniquename\'],\n+ tRNA_type,\n+ )\n+ retry(func0)\n+\n+ if args.source:\n+ gene_id = newfeature[\'features\'][0][\'parent_id\']\n+\n+ def setSource():\n+ wa.annotations.addAttributes(gene_id, {\'DatasetSource\': [args.source]})\n+ retry(setSource)\n+\n+ sys.stdout.write(\'\\t\'.join([\n+ feature.id,\n+ newfeature[\'features\'][0][\'uniquename\'],\n+ \'success\',\n+ ]))\n+ elif featureData[0][\'type\'][\'name\'] == \'terminator\':\n+ # We\'re experiencing a (transient?) problem where gene_001 to\n+ # gene_025 will be rejected. Thus, hardcode to a known working\n+ # gene name and update later.\n+ featureData[0][\'name\'] = \'terminator_000\'\n+ newfeature = wa.annotations.addFeature(featureData, trustme=True)\n+\n+ def func0():\n+ wa.annotations.setName(\n+ newfeature[\'features\'][0][\'uniquename\'],\n+ \'termin'..b' setSource():\n+ wa.annotations.addAttributes(gene_id, {\'DatasetSource\': [args.source]})\n+ retry(setSource)\n+\n+ sys.stdout.write(\'\\t\'.join([\n+ feature.id,\n+ newfeature[\'features\'][0][\'uniquename\'],\n+ \'success\',\n+ ]))\n+ else:\n+ try:\n+ # We\'re experiencing a (transient?) problem where gene_001 to\n+ # gene_025 will be rejected. Thus, hardcode to a known working\n+ # gene name and update later.\n+ featureData[0][\'name\'] = \'gene_000\'\n+ # Extract CDS feature from the feature data, this will be used\n+ # to set the CDS location correctly (apollo currently screwing\n+ # this up (2.0.6))\n+ CDS = featureData[0][\'children\'][0][\'children\']\n+ CDS = [x for x in CDS if x[\'type\'][\'name\'] == \'CDS\'][0][\'location\']\n+ # Create the new feature\n+ newfeature = wa.annotations.addFeature(featureData, trustme=True)\n+ # Extract the UUIDs that apollo returns to us\n+ mrna_id = newfeature[\'features\'][0][\'uniquename\']\n+ gene_id = newfeature[\'features\'][0][\'parent_id\']\n+ # Sleep to give it time to actually persist the feature. Apollo\n+ # is terrible about writing + immediately reading back written\n+ # data.\n+ time.sleep(1)\n+ # Correct the translation start, but with strand specific log\n+ if CDS[\'strand\'] == 1:\n+ wa.annotations.setTranslationStart(mrna_id, min(CDS[\'fmin\'], CDS[\'fmax\']))\n+ else:\n+ wa.annotations.setTranslationStart(mrna_id, max(CDS[\'fmin\'], CDS[\'fmax\']) - 1)\n+\n+ # Finally we set the name, this should be correct.\n+ time.sleep(0.5)\n+ wa.annotations.setName(mrna_id, feature.qualifiers.get(\'product\', feature.qualifiers.get(\'Name\', ["Unknown"]))[0])\n+ time.sleep(0.5)\n+\n+ def func():\n+ wa.annotations.setName(gene_id, feature.qualifiers.get(\'product\', feature.qualifiers.get(\'Name\', ["Unknown"]))[0])\n+ retry(func)\n+\n+ if args.source:\n+ gene_id = newfeature[\'features\'][0][\'parent_id\']\n+\n+ def setSource():\n+ wa.annotations.addAttributes(gene_id, {\'DatasetSource\': [args.source]})\n+ retry(setSource)\n+ extra_attr = {}\n+ for (key, values) in feature.qualifiers.items():\n+ if key in bad_quals:\n+ continue\n+\n+ if key == \'Note\':\n+ def func2():\n+ wa.annotations.addComments(gene_id, values)\n+ retry(func2)\n+ else:\n+ extra_attr[key] = values\n+\n+ def func3():\n+ wa.annotations.addAttributes(gene_id, extra_attr)\n+ retry(func3)\n+\n+ sys.stdout.write(\'\\t\'.join([\n+ feature.id,\n+ gene_id,\n+ \'success\',\n+ ]))\n+ except Exception as e:\n+ msg = str(e)\n+ if \'\\n\' in msg:\n+ msg = msg[0:msg.index(\'\\n\')]\n+ sys.stdout.write(\'\\t\'.join([\n+ feature.id,\n+ \'\',\n+ \'ERROR\',\n+ msg\n+ ]))\n+ sys.stdout.write(\'\\n\')\n+ sys.stdout.flush()\n' |
b |
diff -r 000000000000 -r 5aa3bc8d0253 create_features_from_gff3.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_features_from_gff3.xml Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,50 @@ +<?xml version="1.0"?> +<tool id="feat_from_gff3" name="GFF3 to Apollo Annotations" version="0.9"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <code file="webapollo.py"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/create_features_from_gff3.py +@ADMIN_AUTH@ +@ORG_OR_GUESS@ + +"$__user_email__" +'$gff3_data' +--source "${__app__.config.galaxy_infrastructure_url}history/view/${__app__.security.encode_id($gff3_data.history_id)}" + +> $output]]></command> + <inputs> + <expand macro="org_or_guess" /> + <expand macro="gff3_input" /> + </inputs> + <outputs> + <data format="tabular" name="output" label="Process and Error Log"/> + </outputs> + <tests> + <test expect_failure="true"> + <conditional name="org_source"> + <param name="source_select" value="direct"/> + <param name="org_raw" value="Test org" /> + </conditional> + <param name="gff3_data" value="good-model.gff3"/> + <expand macro="test_result" /> + </test> + </tests> + <help><![CDATA[ +**NOTA BENE** + +This is **incredibly, highly experimental** + +DO NOT: + +- Run on gff3 referencing multiple reference sequences/contigs +- Expect it to work well +- Expect it to work at all + +@REFERENCES@ +]]></help> + <expand macro="citations"/> +</tool> |
b |
diff -r 000000000000 -r 5aa3bc8d0253 create_or_update_organism.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/create_or_update_organism.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,102 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json +import logging +import shutil +import sys +import time + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Create or update an organism in an Apollo instance') + WAAuth(parser) + + parser.add_argument('jbrowse', help='JBrowse Data Directory') + parser.add_argument('email', help='User Email') + OrgOrGuess(parser) + parser.add_argument('--genus', help='Organism Genus') + parser.add_argument('--species', help='Organism Species') + parser.add_argument('--public', action='store_true', help='Make organism public') + parser.add_argument('--group', help='Give access to a user group') + parser.add_argument('--remove_old_directory', action='store_true', help='Remove old directory') + + args = parser.parse_args() + wa = WebApolloInstance(args.apollo, args.username, args.password) + + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + log.info("Determining if add or update required") + try: + org = wa.organisms.findOrganismByCn(org_cn) + except Exception: + org = None + + if org: + has_perms = False + old_directory = org.directory + for user_owned_organism in gx_user.organismPermissions: + if 'WRITE' in user_owned_organism['permissions']: + has_perms = True + break + + if not has_perms: + print("Naming Conflict. You do not have permissions to access this organism. Either request permission from the owner, or choose a different name for your organism.") + sys.exit(2) + + log.info("\tUpdating Organism") + data = wa.organisms.updateOrganismInfo( + org['id'], + org_cn, + args.jbrowse, + # mandatory + genus=args.genus, + species=args.species, + public=args.public + ) + time.sleep(2) + if(args.remove_old_directory): + shutil.rmtree(old_directory) + + data = [wa.organisms.findOrganismById(org['id'])] + + else: + # New organism + log.info("\tAdding Organism") + data = wa.organisms.addOrganism( + org_cn, + args.jbrowse, + genus=args.genus, + species=args.species, + public=args.public + ) + + # Must sleep before we're ready to handle + time.sleep(2) + log.info("Updating permissions for %s on %s", gx_user, org_cn) + wa.users.updateOrganismPermission( + gx_user, org_cn, + write=True, + export=True, + read=True, + ) + + # Group access + if args.group: + group = wa.groups.loadGroupByName(name=args.group) + res = wa.groups.updateOrganismPermission(group, org_cn, + administrate=False, write=True, read=True, + export=True) + + data = [o for o in data if o['commonName'] == org_cn] + print(json.dumps(data, indent=2)) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 delete_features.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delete_features.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,66 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import logging +import random + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance, retry +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to delete all features from an organism') + WAAuth(parser) + parser.add_argument('email', help='User Email') + parser.add_argument('--type', help='Feature type filter') + OrgOrGuess(parser) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + # Get organism + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # TODO: Check user perms on org. + org = wa.organisms.findOrganismByCn(org_cn) + + sequences = wa.organisms.getSequencesForOrganism(org['id']) + for sequence in sequences['sequences']: + log.info("Processing %s %s", org['commonName'], sequence['name']) + # Call setSequence to tell apollo which organism we're working with + wa.annotations.setSequence(sequence['name'], org['id']) + # Then get a list of features. + features = wa.annotations.getFeatures() + # For each feature in the features + for feature in sorted(features['features'], key=lambda x: random.random()): + if args.type: + if args.type == 'tRNA': + if feature['type']['name'] != 'tRNA': + continue + + elif args.type == 'terminator': + if feature['type']['name'] != 'terminator': + continue + + elif args.type == 'mRNA': + if feature['type']['name'] != 'mRNA': + continue + + else: + raise Exception("Unknown type") + + # We see that deleteFeatures wants a uniqueName, and so we pass + # is the uniquename field in the feature. + def fn(): + wa.annotations.deleteFeatures([feature['uniquename']]) + print('Deleted %s [type=%s]' % (feature['uniquename'], feature['type']['name'])) + + if not retry(fn, limit=3): + print('Error %s' % feature['uniquename']) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 delete_organism.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/delete_organism.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,40 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import logging + +from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to completely delete an organism') + WAAuth(parser) + parser.add_argument('email', help='User Email') + OrgOrGuess(parser) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + + # Get organism + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + + # TODO: Check user perms on org. + org = wa.organisms.findOrganismByCn(org_cn) + + # Call setSequence to tell apollo which organism we're working with + wa.annotations.setSequence(org['commonName'], org['id']) + # Then get a list of features. + features = wa.annotations.getFeatures() + # For each feature in the features + for feature in features['features']: + # We see that deleteFeatures wants a uniqueName, and so we pass + # is the uniquename field in the feature. + print(wa.annotations.deleteFeatures([feature['uniquename']])) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 export.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/export.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,88 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json +import sys + +from BCBio import GFF + +from Bio import SeqIO + +from future import standard_library + +from webapollo import CnOrGuess, GuessCn, WAAuth, WebApolloInstance + +standard_library.install_aliases() +try: + import StringIO as io +except ImportError: + import io + + +def export(org_cn, seqs): + org_data = wa.organisms.findOrganismByCn(org_cn) + + data = io.StringIO() + + kwargs = dict( + exportType='GFF3', + seqType='genomic', + exportGff3Fasta=True, + output="text", + exportFormat="text", + organism=org_cn, + ) + + if len(seqs) > 0: + data.write(wa.io.write( + exportAllSequences=False, + sequences=seqs, + **kwargs + ).encode('utf-8')) + else: + data.write(wa.io.write( + exportAllSequences=True, + sequences=[], + **kwargs + ).encode('utf-8')) + + # Seek back to start + data.seek(0) + + records = list(GFF.parse(data)) + if len(records) == 0: + print("Could not find any sequences or annotations for this organism + reference sequence") + sys.exit(2) + else: + for record in records: + record.annotations = {} + record.features = sorted(record.features, key=lambda x: x.location.start) + if args.gff: + GFF.write([record], args.gff) + record.description = "" + if args.fasta: + SeqIO.write([record], args.fasta, 'fasta') + + return org_data + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + WAAuth(parser) + CnOrGuess(parser) + parser.add_argument('--gff', type=argparse.FileType('w')) + parser.add_argument('--fasta', type=argparse.FileType('w')) + parser.add_argument('--json', type=argparse.FileType('w')) + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + org_cn_list, seqs = GuessCn(args, wa) + + org_data = [] + for org_cn in org_cn_list: + indiv_org_data = export(org_cn, seqs) + org_data.append(indiv_org_data) + args.json.write(json.dumps(org_data, indent=2)) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 fetch_organism_jbrowse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fetch_organism_jbrowse.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,113 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import filecmp +import logging +import os +import subprocess +import sys +import time + +from webapollo import GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def are_dir_trees_equal(dir1, dir2): + """ + Compare two directories recursively. Files in each directory are + assumed to be equal if their names and contents are equal. + + @param dir1: First directory path + @param dir2: Second directory path + + @return: True if the directory trees are the same and + there were no errors while accessing the directories or files, + False otherwise. + + # http://stackoverflow.com/questions/4187564/recursive-dircmp-compare-two-directories-to-ensure-they-have-the-same-files-and/6681395#6681395 + """ + + dirs_cmp = filecmp.dircmp(dir1, dir2) + if len(dirs_cmp.left_only) > 0 or len(dirs_cmp.right_only) > 0 or \ + len(dirs_cmp.funny_files) > 0: + print(('LEFT', dirs_cmp.left_only)) + print(('RIGHT', dirs_cmp.right_only)) + print(('FUNNY', dirs_cmp.funny_files)) + return False + (_, mismatch, errors) = filecmp.cmpfiles( + dir1, dir2, dirs_cmp.common_files, shallow=False) + if len(mismatch) > 0 or len(errors) > 0: + print(mismatch) + print(errors) + return False + for common_dir in dirs_cmp.common_dirs: + new_dir1 = os.path.join(dir1, common_dir) + new_dir2 = os.path.join(dir2, common_dir) + if not are_dir_trees_equal(new_dir1, new_dir2): + return False + return True + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + WAAuth(parser) + OrgOrGuess(parser) + parser.add_argument('target_dir', help='Target directory') + + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + # User must have an account + org_cn = GuessOrg(args, wa) + if isinstance(org_cn, list): + org_cn = org_cn[0] + org = wa.organisms.findOrganismByCn(org_cn) + + if not os.path.exists(args.target_dir): + os.makedirs(args.target_dir) + + if not os.path.exists(os.path.join(org['directory'], 'seq')): + sys.stderr.write("Missing seq directory BEFORE copy") + sys.exit(1) + + cmd = [ + 'rsync', '-avr', + org['directory'].rstrip('/') + '/', + os.path.join(args.target_dir, 'data', '') + ] + # We run this OBSESSIVELY because my org had a hiccup where the origin + # (silent) cp -R failed at one point. This caused MANY HEADACHES. + # + # Our response is to run this 3 times (in case the issue is temporary), + # with delays in between. And ensure that we have the correct number of + # files / folders before and after. + sys.stderr.write(' '.join(cmd)) + sys.stderr.write('\n') + sys.stderr.write(subprocess.check_output(cmd)) + if not are_dir_trees_equal( + os.path.join(org['directory'].rstrip('/')), + os.path.join(args.target_dir, 'data') + ): + # Not good + time.sleep(5) + sys.stderr.write('\n') + sys.stderr.write(' '.join(cmd)) + sys.stderr.write('\n') + sys.stderr.write(subprocess.check_output(cmd)) + if not are_dir_trees_equal( + os.path.join(org['directory'].rstrip('/'), 'data'), + os.path.join(args.target_dir, 'data') + ): + time.sleep(5) + sys.stderr.write('\n') + sys.stderr.write(' '.join(cmd)) + sys.stderr.write('\n') + sys.stderr.write(subprocess.check_output(cmd)) + if not are_dir_trees_equal( + os.path.join(org['directory'].rstrip('/'), 'data'), + os.path.join(args.target_dir, 'data') + ): + sys.stderr.write('FAILED THREE TIMES TO COPY. SOMETHING IS WRONG WRONG WRONG.') + sys.exit(2) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 json2iframe.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/json2iframe.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,32 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser.add_argument('apollo', help='Complete Apollo URL') + parser.add_argument('json', type=argparse.FileType("r"), help='JSON Data') + parser.add_argument('external_apollo_url') + + args = parser.parse_args() + + # https://fqdn/apollo/annotator/loadLink?loc=NC_005880:0..148317&organism=326&tracks= + data = json.load(args.json) + + # This is base64 encoded to get past the toolshed's filters. + HTML_TPL = """ + <html> + <head> + <title>Embedded Apollo Access</title> + <style type="text/css">body {{margin: 0;}} iframe {{border: 0;width: 100%;height: 100%}}</style> + </head> + <body> + <iframe src="{base_url}/annotator/loadLink?loc={chrom}&organism={orgId}&tracklist=1"></iframe> + </body> + </html> + """ + + print(HTML_TPL.format(base_url=args.external_apollo_url, chrom="", orgId=data[0]['id'])) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 list_organisms.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_organisms.py Mon Sep 11 05:45:37 2017 -0400 |
b |
@@ -0,0 +1,22 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json + +from webapollo import AssertUser, WAAuth, WebApolloInstance, accessible_organisms + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='List all organisms available in an Apollo instance') + WAAuth(parser) + parser.add_argument('email', help='User Email') + args = parser.parse_args() + + wa = WebApolloInstance(args.apollo, args.username, args.password) + + gx_user = AssertUser(wa.users.loadUsers(email=args.email)) + all_orgs = wa.organisms.findAllOrganisms() + + orgs = accessible_organisms(gx_user, all_orgs) + + print(json.dumps(orgs, indent=2)) |
b |
diff -r 000000000000 -r 5aa3bc8d0253 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,129 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="0.6.2">bcbiogff</requirement> + <requirement type="package" version="2.12.4">requests</requirement> + <requirement type="package" version="0.16.0">future</requirement> + <yield/> + </requirements> + </xml> + <token name="@DATA_DIR@">\$GALAXY_SHARED_DIR</token> + <token name="@EXT_URL@"> +"\$GALAXY_WEBAPOLLO_EXT_URL" + </token> + <token name="@URL@"> +"\$GALAXY_WEBAPOLLO_URL" + </token> + <token name="@ADMIN_AUTH@"> +"\$GALAXY_WEBAPOLLO_URL" +"\$GALAXY_WEBAPOLLO_USER" +"\$GALAXY_WEBAPOLLO_PASSWORD" + </token> + + <token name="@ORG_OR_GUESS@"> +<![CDATA[ +#if $org_source.source_select == "auto_json": + --org_json "${org_source.org_file}" +#elif $org_source.source_select == "select": + --org_id "${org_source.org_select}" +#else: + --org_raw "${org_source.org_raw}" +#end if +]]> + </token> + <token name="@ORG_CN_OR_GUESS@"> +<![CDATA[ +@ORG_OR_GUESS@ + +#if $cn_source.source_select == "auto": + #if str($cn_source.cn_file) != "None": + --seq_fasta $cn_source.cn_file + #end if +#else + #if $cn_source.source_select != "all" and len($cn_source.refseqs) > 0: + --seq_raw + #for $item in $cn_source.refseqs: + "${item.refseq}" + #end for + #end if +#end if +]]> + </token> + <xml name="org_or_guess"> + <conditional name="org_source"> + <param name="source_select" type="select" label="Organism Common Name Source"> + <option value="select">Select</option> + <option value="direct">Direct Entry</option> + <option value="auto_json">Autodetect from Apollo JSON</option> + </param> + <when value="select"> + <param name="org_select" type="select" dynamic_options="galaxy_list_orgs(__trans__)" label="Organism" /> + </when> + <when value="direct"> + <param name="org_raw" type="text" label="Organism Common Name" optional="False" /> + </when> + <when value="auto_json"> + <param name="org_file" type="data" format="json" label="Apollo Organism File" help="Will only fetch first organism" /> + </when> + </conditional> + </xml> + <xml name="cn_or_guess"> + <expand macro="org_or_guess" /> + <conditional name="cn_source"> + <param name="source_select" type="select" label="Organism Sequence(s) Source"> + <option value="all">All Refseqs</option> + <option value="direct">Direct Entry</option> + <option value="auto">Autodetect from Fasta</option> + </param> + <when value="all"> + </when> + <when value="auto"> + <param name="cn_file" type="data" format="fasta" label="Reference sequence(s)" optional="true"/> + </when> + <when value="direct"> + <repeat name="refseqs" title="Reference Sequences" help="These are used to identify sequences within an organism that you wish to extract"> + <param name="refseq" type="text" label="Reference sequence(s)" /> + </repeat> + </when> + </conditional> + </xml> + + <xml name="test_result"> + <assert_stderr> + <has_text text="MissingSchema" /> + </assert_stderr> + </xml> + + <xml name="citations"> + <citations> + </citations> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <token name="@GENOME_SELECTOR_PRE@"> +#if $reference_genome.reference_genome_source == 'history': + ln -s $reference_genome.genome_fasta genomeref.fa; +#end if + </token> + <token name="@GENOME_SELECTOR@"> +#if $reference_genome.reference_genome_source == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#elif $reference_genome.reference_genome_source == 'history': + genomeref.fa +#end if + </token> +<token name="@REFERENCES@"> +<![CDATA[ +------ + +**Citation** + +If you use this tool in Galaxy, please cite: +Eric Rasche (2016), `Galaxy Apollo Tools <https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo>`_ +]]> + </token> +</macros> |
b |
diff -r 000000000000 -r 5aa3bc8d0253 test-data/bad-model.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bad-model.gff3 Mon Sep 11 05:45:37 2017 -0400 |
b |
@@ -0,0 +1,9 @@ +##gff-version 3 +##sequence-region Maroon_JMcDermott 1 144762 +Maroon_JMcDermott . gene 14488 14805 . + . Name=gene_26;date_creation=2016-02-17;owner=jmc_texas@tamu.edu;ID=707c88b7-36d1-44e3-93e6-d1d4f1219d57;date_last_modified=2016-02-17 +Maroon_JMcDermott . mRNA 14488 14805 . + . Name=gene_26-00001;date_creation=2016-02-17;Parent=707c88b7-36d1-44e3-93e6-d1d4f1219d57;owner=jmc_texas@tamu.edu;ID=8760695d-b88c-41c0-857b-540e6db81fe8;date_last_modified=2016-02-17 +Maroon_JMcDermott . CDS 14707 14805 . + 0 Name=94abf796-4c8d-45f4-916b-4d279616565e-CDS;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=94abf796-4c8d-45f4-916b-4d279616565e +Maroon_JMcDermott . exon 14497 14805 . + . Name=d2ebd8d0-6558-4674-a38f-346f88256340-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=d2ebd8d0-6558-4674-a38f-346f88256340 +Maroon_JMcDermott . exon 14488 14491 . + . Name=2e4119f9-3220-4502-8ddd-4821c872e0d6-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=2e4119f9-3220-4502-8ddd-4821c872e0d6 +Maroon_JMcDermott . non_canonical_five_prime_splice_site 14494 14494 . + . Name=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_five_prime_splice_site-14493;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_five_prime_splice_site-14493 +Maroon_JMcDermott . non_canonical_three_prime_splice_site 14497 14497 . + . Name=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_three_prive_splice_site-14496;Parent=8760695d-b88c-41c0-857b-540e6db81fe8;ID=8760695d-b88c-41c0-857b-540e6db81fe8-non_canonical_three_prive_splice_site-14496 |
b |
diff -r 000000000000 -r 5aa3bc8d0253 test-data/fake.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fake.json Mon Sep 11 05:45:37 2017 -0400 |
[ |
@@ -0,0 +1,1 @@ +[{"id": "fake"}] |
b |
diff -r 000000000000 -r 5aa3bc8d0253 test-data/good-model.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/good-model.gff3 Mon Sep 11 05:45:37 2017 -0400 |
b |
@@ -0,0 +1,7 @@ +##gff-version 3 +##sequence-region Maroon_JMcDermott 1 14805 +Maroon_JMcDermott feature gene 14488 14805 . + . ID=707c88b7-36d1-44e3-93e6-d1d4f1219d57;Name=gene_26;date_creation=2016-02-17;date_last_modified=2016-02-17;owner=jmc_texas%40tamu.edu +Maroon_JMcDermott feature mRNA 14488 14805 . + . ID=8760695d-b88c-41c0-857b-540e6db81fe8;Name=gene_26-00001;Parent=707c88b7-36d1-44e3-93e6-d1d4f1219d57;date_creation=2016-02-17;date_last_modified=2016-02-17;owner=jmc_texas%40tamu.edu +Maroon_JMcDermott feature CDS 14707 14805 . + 0 ID=94abf796-4c8d-45f4-916b-4d279616565e;Name=94abf796-4c8d-45f4-916b-4d279616565e-CDS;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 +Maroon_JMcDermott feature exon 14497 14805 . + . ID=d2ebd8d0-6558-4674-a38f-346f88256340;Name=d2ebd8d0-6558-4674-a38f-346f88256340-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 +Maroon_JMcDermott feature Shine_Dalgarno_sequence 14488 14491 . + . ID=2e4119f9-3220-4502-8ddd-4821c872e0d6;Name=2e4119f9-3220-4502-8ddd-4821c872e0d6-exon;Parent=8760695d-b88c-41c0-857b-540e6db81fe8 |
b |
diff -r 000000000000 -r 5aa3bc8d0253 webapollo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/webapollo.py Mon Sep 11 05:45:37 2017 -0400 |
[ |
b'@@ -0,0 +1,1718 @@\n+from __future__ import print_function\n+\n+import argparse\n+import collections\n+import json\n+import logging\n+import os\n+import time\n+\n+from abc import abstractmethod\n+from builtins import next\n+from builtins import object\n+from builtins import str\n+\n+from BCBio import GFF\n+\n+from Bio import SeqIO\n+\n+from future import standard_library\n+\n+import requests\n+\n+\n+standard_library.install_aliases()\n+try:\n+ import StringIO as io\n+except BaseException:\n+ import io\n+logging.getLogger("requests").setLevel(logging.CRITICAL)\n+log = logging.getLogger()\n+\n+\n+#############################################\n+# BEGIN IMPORT OF CACHING LIBRARY #\n+#############################################\n+# This code is licensed under the MIT #\n+# License and is a copy of code publicly #\n+# available in rev. #\n+# e27332bc82f4e327aedaec17c9b656ae719322ed #\n+# of https://github.com/tkem/cachetools/ #\n+#############################################\n+\n+class DefaultMapping(collections.MutableMapping):\n+\n+ __slots__ = ()\n+\n+ @abstractmethod\n+ def __contains__(self, key): # pragma: nocover\n+ return False\n+\n+ @abstractmethod\n+ def __getitem__(self, key): # pragma: nocover\n+ if hasattr(self.__class__, \'__missing__\'):\n+ return self.__class__.__missing__(self, key)\n+ else:\n+ raise KeyError(key)\n+\n+ def get(self, key, default=None):\n+ if key in self:\n+ return self[key]\n+ else:\n+ return default\n+\n+ __marker = object()\n+\n+ def pop(self, key, default=__marker):\n+ if key in self:\n+ value = self[key]\n+ del self[key]\n+ elif default is self.__marker:\n+ raise KeyError(key)\n+ else:\n+ value = default\n+ return value\n+\n+ def setdefault(self, key, default=None):\n+ if key in self:\n+ value = self[key]\n+ else:\n+ self[key] = value = default\n+ return value\n+\n+\n+DefaultMapping.register(dict)\n+\n+\n+class _DefaultSize(object):\n+ def __getitem__(self, _):\n+ return 1\n+\n+ def __setitem__(self, _, value):\n+ assert value == 1\n+\n+ def pop(self, _):\n+ return 1\n+\n+\n+class Cache(DefaultMapping):\n+ """Mutable mapping to serve as a simple cache or cache base class."""\n+\n+ __size = _DefaultSize()\n+\n+ def __init__(self, maxsize, missing=None, getsizeof=None):\n+ if missing:\n+ self.__missing = missing\n+ if getsizeof:\n+ self.__getsizeof = getsizeof\n+ self.__size = dict()\n+ self.__data = dict()\n+ self.__currsize = 0\n+ self.__maxsize = maxsize\n+\n+ def __repr__(self):\n+ return \'%s(%r, maxsize=%r, currsize=%r)\' % (\n+ self.__class__.__name__,\n+ list(self.__data.items()),\n+ self.__maxsize,\n+ self.__currsize,\n+ )\n+\n+ def __getitem__(self, key):\n+ try:\n+ return self.__data[key]\n+ except KeyError:\n+ return self.__missing__(key)\n+\n+ def __setitem__(self, key, value):\n+ maxsize = self.__maxsize\n+ size = self.getsizeof(value)\n+ if size > maxsize:\n+ raise ValueError(\'value too large\')\n+ if key not in self.__data or self.__size[key] < size:\n+ while self.__currsize + size > maxsize:\n+ self.popitem()\n+ if key in self.__data:\n+ diffsize = size - self.__size[key]\n+ else:\n+ diffsize = size\n+ self.__data[key] = value\n+ self.__size[key] = size\n+ self.__currsize += diffsize\n+\n+ def __delitem__(self, key):\n+ size = self.__size.pop(key)\n+ del self.__data[key]\n+ self.__currsize -= size\n+\n+ def __contains__(self, key):\n+ return key in self.__data\n+\n+ def __missing__(self, key):\n+ value = self.__missing(key)\n+ try:\n+ self.__setitem__(key, value)\n+ exce'..b'e:\n+ data = _galaxy_list_orgs(wa, gx_user, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+ try:\n+ data = cache[cacheKey]\n+ return data\n+ except KeyError:\n+ data = _galaxy_list_orgs(wa, gx_user, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+\n+\n+def _galaxy_list_orgs(wa, gx_user, *args, **kwargs):\n+ # Fetch all organisms\n+ all_orgs = wa.organisms.findAllOrganisms()\n+ # Figure out which are accessible to the user\n+ orgs = accessible_organisms(gx_user, all_orgs)\n+ # Return org list\n+ return orgs\n+\n+\n+def galaxy_list_users(trans, *args, **kwargs):\n+ email = trans.get_user().email\n+ wa = WebApolloInstance(\n+ os.environ[\'GALAXY_WEBAPOLLO_URL\'],\n+ os.environ[\'GALAXY_WEBAPOLLO_USER\'],\n+ os.environ[\'GALAXY_WEBAPOLLO_PASSWORD\']\n+ )\n+ # Assert that the email exists in apollo\n+ try:\n+ gx_user = wa.requireUser(email)\n+ except UnknownUserException:\n+ return []\n+\n+ # Key for cached data\n+ cacheKey = \'users-\' + email\n+ # We don\'t want to trust "if key in cache" because between asking and fetch\n+ # it might through key error.\n+ if cacheKey not in cache:\n+ # However if it ISN\'T there, we know we\'re safe to fetch + put in\n+ # there.\n+ data = _galaxy_list_users(wa, gx_user, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+ try:\n+ # The cache key may or may not be in the cache at this point, it\n+ # /likely/ is. However we take no chances that it wasn\'t evicted between\n+ # when we checked above and now, so we reference the object from the\n+ # cache in preparation to return.\n+ data = cache[cacheKey]\n+ return data\n+ except KeyError:\n+ # If access fails due to eviction, we will fail over and can ensure that\n+ # data is inserted.\n+ data = _galaxy_list_users(wa, gx_user, *args, **kwargs)\n+ cache[cacheKey] = data\n+ return data\n+\n+\n+def _galaxy_list_users(wa, gx_user, *args, **kwargs):\n+ # Fetch the users.\n+ user_data = []\n+ for user in wa.users.loadUsers():\n+ # Reformat\n+ user_data.append((user.username, user.username, False))\n+ return user_data\n+\n+\n+# This is all for implementing the command line interface for testing.\n+class obj(object):\n+ pass\n+\n+\n+class fakeTrans(object):\n+\n+ def __init__(self, username):\n+ self.un = username\n+\n+ def get_user(self):\n+ o = obj()\n+ o.email = self.un\n+ return o\n+\n+\n+def retry(closure, sleep=1, limit=5):\n+ """\n+ Apollo has the bad habit of returning 500 errors if you call APIs\n+ too quickly, largely because of the unholy things that happen in\n+ grails.\n+\n+ To deal with the fact that we cannot send an addComments call too\n+ quickly after a createFeature call, we have this function that will\n+ keep calling a closure until it works.\n+ """\n+ count = 0\n+ while True:\n+ count += 1\n+\n+ if count >= limit:\n+ return False\n+ try:\n+ # Try calling it\n+ closure()\n+ # If successful, exit\n+ return True\n+ except Exception as e:\n+ log.info(str(e)[0:100])\n+ time.sleep(sleep)\n+\n+\n+if __name__ == \'__main__\':\n+ parser = argparse.ArgumentParser(description=\'Test access to apollo server\')\n+ parser.add_argument(\'email\', help=\'Email of user to test\')\n+ parser.add_argument(\'--action\', choices=[\'org\', \'group\', \'users\'], default=\'org\', help=\'Data set to test, fetch a list of groups or users known to the requesting user.\')\n+ args = parser.parse_args()\n+\n+ trans = fakeTrans(args.email)\n+ if args.action == \'org\':\n+ for f in galaxy_list_orgs(trans):\n+ print(f)\n+ elif args.action == \'group\':\n+ for f in galaxy_list_groups(trans):\n+ print(f)\n+ else:\n+ for f in galaxy_list_users(trans):\n+ print(f)\n' |