diff GAFA.py @ 4:117fc7414307 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
author earlhaminst
date Wed, 15 Mar 2017 20:20:58 -0400
parents e17a3470c70a
children b9f1bcf5ee59
line wrap: on
line diff
--- a/GAFA.py	Fri Mar 03 07:20:23 2017 -0500
+++ b/GAFA.py	Wed Mar 15 20:20:58 2017 -0400
@@ -1,12 +1,12 @@
 from __future__ import print_function
 
 import collections
-import json
 import optparse
 import re
+import shutil
 import sqlite3
 
-version = "0.2.0"
+version = "0.3.0"
 
 Sequence = collections.namedtuple('Sequence', ['header', 'sequence'])
 
@@ -50,29 +50,19 @@
 
 def create_tables(conn):
     cur = conn.cursor()
-    cur.execute('PRAGMA foreign_keys = ON')
-    cur.execute('''CREATE TABLE meta (
-        version VARCHAR)''')
-
-    cur.execute('INSERT INTO meta (version) VALUES (?)',
+    # Check that the version of the input database is compatible
+    cur.execute('SELECT version FROM meta')
+    result = cur.fetchone()
+    input_meta_version = result[0]
+    if input_meta_version != '0.3.0':
+        raise Exception("Incompatible input meta version '%s'" % input_meta_version)
+    cur.execute('UPDATE meta SET version=?',
                 (version, ))
 
     cur.execute('''CREATE TABLE gene_family (
         gene_family_id INTEGER PRIMARY KEY,
         gene_tree VARCHAR NOT NULL)''')
 
-    cur.execute('''CREATE TABLE gene (
-        gene_id VARCHAR PRIMARY KEY NOT NULL,
-        gene_symbol VARCHAR,
-        gene_json VARCHAR NOT NULL)''')
-    cur.execute('CREATE INDEX gene_symbol_index ON gene (gene_symbol)')
-
-    cur.execute('''CREATE TABLE transcript (
-        transcript_id VARCHAR PRIMARY KEY NOT NULL,
-        protein_id VARCHAR UNIQUE,
-        protein_sequence VARCHAR,
-        gene_id VARCHAR NOT NULL REFERENCES gene(gene_id))''')
-
     cur.execute('''CREATE TABLE gene_family_member (
         gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id),
         protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id),
@@ -117,44 +107,23 @@
     conn.commit()
 
 
-def gene_json_to_db(conn, fname):
-    with open(fname) as f:
-        all_genes_dict = json.load(f)
-
-    cur = conn.cursor()
-    for gene_dict in all_genes_dict.values():
-        gene_id = gene_dict['id']
-        gene_symbol = gene_dict.get('display_name', None)
-        cur.execute("INSERT INTO gene (gene_id, gene_symbol, gene_json) VALUES (?, ?, ?)",
-                    (gene_id, gene_symbol, json.dumps(gene_dict)))
-
-        if "Transcript" in gene_dict:
-            for transcript in gene_dict["Transcript"]:
-                transcript_id = transcript['id']
-                if 'Translation' in transcript and 'id' in transcript['Translation']:
-                    protein_id = transcript["Translation"]["id"]
-                else:
-                    protein_id = None
-                cur.execute("INSERT INTO transcript (transcript_id, protein_id, gene_id) VALUES (?, ?, ?)",
-                            (transcript_id, protein_id, gene_id))
-    conn.commit()
-
-
 def __main__():
     parser = optparse.OptionParser()
     parser.add_option('-t', '--tree', action='append', help='Gene tree files')
     parser.add_option('-a', '--align', action='append', help='Protein alignments in fasta_aln format')
-    parser.add_option('-g', '--gene', help='Gene features file in JSON format')
+    parser.add_option('-g', '--gene', help='Gene features file in SQLite format')
     parser.add_option('-o', '--output', help='Path of the output file')
     options, args = parser.parse_args()
     if args:
         raise Exception('Use options to provide inputs')
 
+    if options.gene != options.output:
+        shutil.copyfile(options.gene, options.output)
+
     conn = sqlite3.connect(options.output)
+    conn.execute('PRAGMA foreign_keys = ON')
     create_tables(conn)
 
-    gene_json_to_db(conn, options.gene)
-
     for i, (tree, align) in enumerate(zip(options.tree, options.align), start=1):
         newicktree_to_db(conn, i, tree)
         align_to_db(conn, i, align)