Mercurial > repos > earlhaminst > gafa
diff GAFA.py @ 4:117fc7414307 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
author | earlhaminst |
---|---|
date | Wed, 15 Mar 2017 20:20:58 -0400 |
parents | e17a3470c70a |
children | b9f1bcf5ee59 |
line wrap: on
line diff
--- a/GAFA.py Fri Mar 03 07:20:23 2017 -0500 +++ b/GAFA.py Wed Mar 15 20:20:58 2017 -0400 @@ -1,12 +1,12 @@ from __future__ import print_function import collections -import json import optparse import re +import shutil import sqlite3 -version = "0.2.0" +version = "0.3.0" Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) @@ -50,29 +50,19 @@ def create_tables(conn): cur = conn.cursor() - cur.execute('PRAGMA foreign_keys = ON') - cur.execute('''CREATE TABLE meta ( - version VARCHAR)''') - - cur.execute('INSERT INTO meta (version) VALUES (?)', + # Check that the version of the input database is compatible + cur.execute('SELECT version FROM meta') + result = cur.fetchone() + input_meta_version = result[0] + if input_meta_version != '0.3.0': + raise Exception("Incompatible input meta version '%s'" % input_meta_version) + cur.execute('UPDATE meta SET version=?', (version, )) cur.execute('''CREATE TABLE gene_family ( gene_family_id INTEGER PRIMARY KEY, gene_tree VARCHAR NOT NULL)''') - cur.execute('''CREATE TABLE gene ( - gene_id VARCHAR PRIMARY KEY NOT NULL, - gene_symbol VARCHAR, - gene_json VARCHAR NOT NULL)''') - cur.execute('CREATE INDEX gene_symbol_index ON gene (gene_symbol)') - - cur.execute('''CREATE TABLE transcript ( - transcript_id VARCHAR PRIMARY KEY NOT NULL, - protein_id VARCHAR UNIQUE, - protein_sequence VARCHAR, - gene_id VARCHAR NOT NULL REFERENCES gene(gene_id))''') - cur.execute('''CREATE TABLE gene_family_member ( gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id), protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id), @@ -117,44 +107,23 @@ conn.commit() -def gene_json_to_db(conn, fname): - with open(fname) as f: - all_genes_dict = json.load(f) - - cur = conn.cursor() - for gene_dict in all_genes_dict.values(): - gene_id = gene_dict['id'] - gene_symbol = gene_dict.get('display_name', None) - cur.execute("INSERT INTO gene (gene_id, gene_symbol, gene_json) VALUES (?, ?, ?)", - (gene_id, gene_symbol, json.dumps(gene_dict))) - - if "Transcript" in gene_dict: - for transcript in gene_dict["Transcript"]: - transcript_id = transcript['id'] - if 'Translation' in transcript and 'id' in transcript['Translation']: - protein_id = transcript["Translation"]["id"] - else: - protein_id = None - cur.execute("INSERT INTO transcript (transcript_id, protein_id, gene_id) VALUES (?, ?, ?)", - (transcript_id, protein_id, gene_id)) - conn.commit() - - def __main__(): parser = optparse.OptionParser() parser.add_option('-t', '--tree', action='append', help='Gene tree files') parser.add_option('-a', '--align', action='append', help='Protein alignments in fasta_aln format') - parser.add_option('-g', '--gene', help='Gene features file in JSON format') + parser.add_option('-g', '--gene', help='Gene features file in SQLite format') parser.add_option('-o', '--output', help='Path of the output file') options, args = parser.parse_args() if args: raise Exception('Use options to provide inputs') + if options.gene != options.output: + shutil.copyfile(options.gene, options.output) + conn = sqlite3.connect(options.output) + conn.execute('PRAGMA foreign_keys = ON') create_tables(conn) - gene_json_to_db(conn, options.gene) - for i, (tree, align) in enumerate(zip(options.tree, options.align), start=1): newicktree_to_db(conn, i, tree) align_to_db(conn, i, align)