diff create_features_from_gff3.py @ 10:c3b5bc8b4080 draft

"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 08015be1ee8a784e0619f961aaa724857debfd6f"
author gga
date Mon, 02 Dec 2019 05:43:20 -0500
parents 950fb2bf116d
children b763acecfb57
line wrap: on
line diff
--- a/create_features_from_gff3.py	Mon Jul 29 10:08:57 2019 -0400
+++ b/create_features_from_gff3.py	Mon Dec 02 05:43:20 2019 -0500
@@ -1,21 +1,19 @@
 #!/usr/bin/env python
 import argparse
 import logging
-import sys
-import time
+
+from apollo import accessible_organisms
+from apollo.util import GuessOrg, OrgOrGuess
 
-from BCBio import GFF
+from arrow.apollo import get_apollo_instance
 
-from six.moves.builtins import str
-
-from webapollo import GuessOrg, OrgOrGuess, PermissionCheck, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry
+from webapollo import UserObj, handle_credentials
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger(__name__)
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services')
-    WAAuth(parser)
     parser.add_argument('email', help='User Email')
     parser.add_argument('--source', help='URL where the input dataset can be found.')
     OrgOrGuess(parser)
@@ -23,165 +21,25 @@
     parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file')
     args = parser.parse_args()
 
-    wa = WebApolloInstance(args.apollo, args.username, args.password)
+    wa = get_apollo_instance()
     # User must have an account
-    gx_user = wa.users.assertOrCreateUser(args.email)
+    gx_user = UserObj(**wa.users._assert_or_create_user(args.email))
+    handle_credentials(gx_user)
 
     # Get organism
     org_cn = GuessOrg(args, wa)
     if isinstance(org_cn, list):
         org_cn = org_cn[0]
 
-    if not PermissionCheck(gx_user, org_cn, "WRITE"):
-        raise Exception("Action not permitted")
-    org = wa.organisms.findOrganismByCn(org_cn)
-
-    bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID']
-
-    sys.stdout.write('# ')
-    sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages']))
-    sys.stdout.write('\n')
-    # print(wa.annotations.getFeatures())
-    for rec in GFF.parse(args.gff3):
-        wa.annotations.setSequence(rec.id, org['id'])
-        for feature in rec.features:
-            # We can only handle genes right now
-            if feature.type not in ('gene', 'terminator'):
-                continue
-            # Convert the feature into a presentation that Apollo will accept
-            featureData = featuresToFeatureSchema([feature])
-            if 'children' in featureData[0] and any([child['type']['name'] == 'tRNA' for child in featureData[0]['children']]):
-                # We're experiencing a (transient?) problem where gene_001 to
-                # gene_025 will be rejected. Thus, hardcode to a known working
-                # gene name and update later.
-
-                featureData[0]['name'] = 'tRNA_000'
-                tRNA_sf = [child for child in feature.sub_features if child.type == 'tRNA'][0]
-                tRNA_type = 'tRNA-' + tRNA_sf.qualifiers.get('Codon', ["Unk"])[0]
-
-                if 'Name' in feature.qualifiers:
-                    if feature.qualifiers['Name'][0].startswith('tRNA-'):
-                        tRNA_type = feature.qualifiers['Name'][0]
-
-                newfeature = wa.annotations.addFeature(featureData, trustme=True)
-
-                def func0():
-                    wa.annotations.setName(
-                        newfeature['features'][0]['uniquename'],
-                        tRNA_type,
-                    )
-                retry(func0)
-
-                if args.source:
-                    gene_id = newfeature['features'][0]['parent_id']
-
-                    def setSource():
-                        wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
-                    retry(setSource)
-
-                sys.stdout.write('\t'.join([
-                    feature.id,
-                    newfeature['features'][0]['uniquename'],
-                    'success',
-                ]))
-            elif featureData[0]['type']['name'] == 'terminator':
-                # We're experiencing a (transient?) problem where gene_001 to
-                # gene_025 will be rejected. Thus, hardcode to a known working
-                # gene name and update later.
-                featureData[0]['name'] = 'terminator_000'
-                newfeature = wa.annotations.addFeature(featureData, trustme=True)
-
-                def func0():
-                    wa.annotations.setName(
-                        newfeature['features'][0]['uniquename'],
-                        'terminator'
-                    )
-
-                retry(func0)
-
-                if args.source:
-                    gene_id = newfeature['features'][0]['parent_id']
-
-                    def setSource():
-                        wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
-                    retry(setSource)
+    all_orgs = wa.organisms.get_organisms()
+    if 'error' in all_orgs:
+        all_orgs = []
+    all_orgs = [org['commonName'] for org in all_orgs]
+    if org_cn not in all_orgs:
+        raise Exception("Could not find organism %s" % org_cn)
 
-                sys.stdout.write('\t'.join([
-                    feature.id,
-                    newfeature['features'][0]['uniquename'],
-                    'success',
-                ]))
-            else:
-                try:
-                    # We're experiencing a (transient?) problem where gene_001 to
-                    # gene_025 will be rejected. Thus, hardcode to a known working
-                    # gene name and update later.
-                    featureData[0]['name'] = 'gene_000'
-                    # Extract CDS feature from the feature data, this will be used
-                    # to set the CDS location correctly (apollo currently screwing
-                    # this up (2.0.6))
-                    CDS = featureData[0]['children'][0]['children']
-                    CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location']
-                    # Create the new feature
-                    newfeature = wa.annotations.addFeature(featureData, trustme=True)
-                    # Extract the UUIDs that apollo returns to us
-                    mrna_id = newfeature['features'][0]['uniquename']
-                    gene_id = newfeature['features'][0]['parent_id']
-                    # Sleep to give it time to actually persist the feature. Apollo
-                    # is terrible about writing + immediately reading back written
-                    # data.
-                    time.sleep(1)
-                    # Correct the translation start, but with strand specific log
-                    if CDS['strand'] == 1:
-                        wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax']))
-                    else:
-                        wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1)
-
-                    # Finally we set the name, this should be correct.
-                    time.sleep(0.5)
-                    wa.annotations.setName(mrna_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0])
-                    time.sleep(0.5)
-
-                    def func():
-                        wa.annotations.setName(gene_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0])
-                    retry(func)
+    orgs = accessible_organisms(gx_user, [org_cn], 'WRITE')
+    if not orgs:
+        raise Exception("You do not have write permission on this organism")
 
-                    if args.source:
-                        gene_id = newfeature['features'][0]['parent_id']
-
-                        def setSource():
-                            wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
-                        retry(setSource)
-                    extra_attr = {}
-                    for (key, values) in feature.qualifiers.items():
-                        if key in bad_quals:
-                            continue
-
-                        if key == 'Note':
-                            def func2():
-                                wa.annotations.addComments(gene_id, values)
-                            retry(func2)
-                        else:
-                            extra_attr[key] = values
-
-                    def func3():
-                        wa.annotations.addAttributes(gene_id, extra_attr)
-                    retry(func3)
-
-                    sys.stdout.write('\t'.join([
-                        feature.id,
-                        gene_id,
-                        'success',
-                    ]))
-                except Exception as e:
-                    msg = str(e)
-                    if '\n' in msg:
-                        msg = msg[0:msg.index('\n')]
-                    sys.stdout.write('\t'.join([
-                        feature.id,
-                        '',
-                        'ERROR',
-                        msg
-                    ]))
-            sys.stdout.write('\n')
-            sys.stdout.flush()
+    wa.annotations.load_gff3(org_cn, args.gff3, args.source)