Mercurial > repos > earlhaminst > gafa
annotate GAFA.py @ 0:af9f72ddf7f9 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
author | earlhaminst |
---|---|
date | Wed, 21 Dec 2016 07:31:50 -0500 |
parents | |
children | fc8ca4ade638 |
rev | line source |
---|---|
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
2 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
3 import json |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
5 import sqlite3 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
6 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
7 version = "0.1.0" |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
8 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
9 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
10 def create_tables(conn): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
11 cur = conn.cursor() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
12 cur.execute('PRAGMA foreign_keys = ON') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
13 cur.execute('''CREATE TABLE meta ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
14 version VARCHAR)''') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
15 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
16 cur.execute('INSERT INTO meta (version) VALUES (?)', |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
17 (version, )) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
18 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
19 cur.execute('''CREATE TABLE gene_family ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
20 gene_family_id INTEGER PRIMARY KEY, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
21 gene_tree VARCHAR NOT NULL)''') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
22 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
23 cur.execute('''CREATE TABLE gene ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
24 gene_id VARCHAR PRIMARY KEY NOT NULL, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
25 gene_symbol VARCHAR, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
26 gene_json VARCHAR NOT NULL)''') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
27 cur.execute('CREATE INDEX gene_symbol_index ON gene (gene_symbol)') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
28 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
29 cur.execute('''CREATE TABLE transcript ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
30 transcript_id VARCHAR PRIMARY KEY NOT NULL, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
31 protein_id VARCHAR UNIQUE, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
32 gene_id VARCHAR NOT NULL REFERENCES gene(gene_id))''') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
33 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
34 cur.execute('''CREATE TABLE gene_family_member ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
35 gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id), |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
36 protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id), |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
37 alignment VARCHAR NOT NULL, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
38 PRIMARY KEY (gene_family_id, protein_id))''') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
39 conn.commit() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
40 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
41 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
42 def cigar_to_db(conn, i, fname): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
43 cur = conn.cursor() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
44 with open(fname) as f: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
45 for element in f.readlines(): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
46 seq_id, cigar = element.rstrip('\n').split('\t') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
47 # Trim seq_id by removing everything from the first underscore |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
48 seq_id = seq_id.split('_', 1)[0] |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
49 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
50 cur.execute('SELECT transcript_id, protein_id FROM transcript WHERE transcript_id=? OR protein_id=?', |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
51 (seq_id, seq_id)) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
52 results = cur.fetchall() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
53 if len(results) == 0: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
54 raise Exception("Sequence id '%s' could not be found among the transcript and protein ids" % seq_id) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
55 elif len(results) > 1: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
56 raise Exception("Searching sequence id '%s' among the transcript and protein ids returned multiple results" % seq_id) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
57 transcript_id, protein_id = results[0] |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
58 if protein_id is None: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
59 print("Skipping transcript '%s' with no protein id" % transcript_id) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
60 else: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
61 cur.execute('INSERT INTO gene_family_member (gene_family_id, protein_id, alignment) VALUES (?, ?, ?)', |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
62 (i, protein_id, cigar)) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
63 conn.commit() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
64 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
65 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
66 def newicktree_to_db(conn, i, fname): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
67 with open(fname) as f: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
68 tree = f.read().replace('\n', '') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
69 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
70 cur = conn.cursor() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
71 cur.execute('INSERT INTO gene_family (gene_family_id, gene_tree) VALUES (?, ?)', |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
72 (i, tree)) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
73 conn.commit() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
74 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
75 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
76 def gene_json_to_db(conn, fname): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
77 with open(fname) as f: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
78 all_genes_dict = json.load(f) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
79 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
80 cur = conn.cursor() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
81 for gene_dict in all_genes_dict.values(): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
82 gene_id = gene_dict['id'] |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
83 gene_symbol = gene_dict.get('display_name', None) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
84 cur.execute("INSERT INTO gene (gene_id, gene_symbol, gene_json) VALUES (?, ?, ?)", |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
85 (gene_id, gene_symbol, json.dumps(gene_dict))) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
86 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
87 if "Transcript" in gene_dict: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
88 for transcript in gene_dict["Transcript"]: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
89 transcript_id = transcript['id'] |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
90 if 'Translation' in transcript and 'id' in transcript['Translation']: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
91 protein_id = transcript["Translation"]["id"] |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
92 else: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
93 protein_id = None |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
94 cur.execute("INSERT INTO transcript (transcript_id, protein_id, gene_id) VALUES (?, ?, ?)", |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
95 (transcript_id, protein_id, gene_id)) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
96 conn.commit() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
97 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
98 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
99 def __main__(): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
100 parser = optparse.OptionParser() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
101 parser.add_option('-t', '--tree', action='append', help='Gene tree files') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
102 parser.add_option('-c', '--cigar', action='append', help='CIGAR alignments of CDS files in tabular format') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
103 parser.add_option('-g', '--gene', help='Gene features file in JSON format') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
104 parser.add_option('-o', '--output', help='Path of the output file') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
105 options, args = parser.parse_args() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
106 if args: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
107 raise Exception('Use options to provide inputs') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
108 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
109 conn = sqlite3.connect(options.output) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
110 create_tables(conn) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
111 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
112 gene_json_to_db(conn, options.gene) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
113 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
114 for i, (tree, cigar) in enumerate(zip(options.tree, options.cigar), start=1): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
115 newicktree_to_db(conn, i, tree) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
116 cigar_to_db(conn, i, cigar) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
117 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
118 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
119 if __name__ == '__main__': |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
120 __main__() |