Mercurial > repos > earlhaminst > gafa
annotate GAFA.py @ 7:b9f1bcf5ee59 draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
author | earlhaminst |
---|---|
date | Wed, 25 Apr 2018 10:59:55 -0400 |
parents | 117fc7414307 |
children |
rev | line source |
---|---|
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
2 |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
3 import collections |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
5 import re |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
6 import shutil |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
7 import sqlite3 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
8 |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
9 version = "0.3.0" |
7
b9f1bcf5ee59
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
earlhaminst
parents:
4
diff
changeset
|
10 compatible_version = ['0.3.0', '0.4.0'] |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
11 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
12 Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
13 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
14 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
15 def FASTAReader_gen(fasta_filename): |
3
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
16 with open(fasta_filename) as fasta_file: |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
17 line = fasta_file.readline() |
3
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
18 while True: |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
19 if not line: |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
20 return |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
21 assert line.startswith('>'), "FASTA headers must start with >" |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
22 header = line.rstrip() |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
23 sequence_parts = [] |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
24 line = fasta_file.readline() |
3
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
25 while line and line[0] != '>': |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
26 sequence_parts.append(line.rstrip()) |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
27 line = fasta_file.readline() |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
28 sequence = "".join(sequence_parts) |
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
earlhaminst
parents:
1
diff
changeset
|
29 yield Sequence(header, sequence) |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
30 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
31 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
32 FASTA_MATCH_RE = re.compile(r'[^-]') |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
33 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
34 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
35 def fasta_aln2cigar(sequence): |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
36 # Converts each match into M and each gap into D |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
37 tmp_seq = FASTA_MATCH_RE.sub('M', sequence) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
38 tmp_seq = tmp_seq.replace('-', 'D') |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
39 # Split the sequence in substrings composed by the same letter |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
40 tmp_seq = tmp_seq.replace('DM', 'D,M') |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
41 tmp_seq = tmp_seq.replace('MD', 'M,D') |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
42 cigar_list = tmp_seq.split(',') |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
43 # Condense each substring, e.g. DDDD in 4D, and concatenate them again |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
44 cigar = '' |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
45 for s in cigar_list: |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
46 if len(s) > 1: |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
47 cigar += str(len(s)) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
48 cigar += s[0] |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
49 return cigar |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
50 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
51 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
52 def create_tables(conn): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
53 cur = conn.cursor() |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
54 # Check that the version of the input database is compatible |
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
55 cur.execute('SELECT version FROM meta') |
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
56 result = cur.fetchone() |
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
57 input_meta_version = result[0] |
7
b9f1bcf5ee59
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
earlhaminst
parents:
4
diff
changeset
|
58 if input_meta_version not in compatible_version: |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
59 raise Exception("Incompatible input meta version '%s'" % input_meta_version) |
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
60 cur.execute('UPDATE meta SET version=?', |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
61 (version, )) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
62 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
63 cur.execute('''CREATE TABLE gene_family ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
64 gene_family_id INTEGER PRIMARY KEY, |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
65 gene_tree VARCHAR NOT NULL)''') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
66 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
67 cur.execute('''CREATE TABLE gene_family_member ( |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
68 gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id), |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
69 protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id), |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
70 protein_alignment VARCHAR NOT NULL, |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
71 PRIMARY KEY (gene_family_id, protein_id))''') |
7
b9f1bcf5ee59
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
earlhaminst
parents:
4
diff
changeset
|
72 |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
73 conn.commit() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
74 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
75 |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
76 def align_to_db(conn, i, fname): |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
77 cur = conn.cursor() |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
78 for fasta_seq_align in FASTAReader_gen(fname): |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
79 seq_id = fasta_seq_align.header[1:] |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
80 # Trim seq_id by removing everything from the first underscore |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
81 seq_id = seq_id.split('_', 1)[0] |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
82 |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
83 cur.execute('SELECT transcript_id, protein_id FROM transcript WHERE transcript_id=? OR protein_id=?', |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
84 (seq_id, seq_id)) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
85 results = cur.fetchall() |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
86 if len(results) == 0: |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
87 raise Exception("Sequence id '%s' could not be found among the transcript and protein ids" % seq_id) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
88 elif len(results) > 1: |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
89 raise Exception("Searching sequence id '%s' among the transcript and protein ids returned multiple results" % seq_id) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
90 transcript_id, protein_id = results[0] |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
91 if protein_id is None: |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
92 print("Skipping transcript '%s' with no protein id" % transcript_id) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
93 else: |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
94 cigar = fasta_aln2cigar(fasta_seq_align.sequence) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
95 cur.execute('INSERT INTO gene_family_member (gene_family_id, protein_id, protein_alignment) VALUES (?, ?, ?)', |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
96 (i, protein_id, cigar)) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
97 protein_sequence = fasta_seq_align.sequence.replace('-', '') |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
98 cur.execute('UPDATE transcript SET protein_sequence=? WHERE protein_id=?', (protein_sequence, protein_id)) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
99 conn.commit() |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
100 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
101 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
102 def newicktree_to_db(conn, i, fname): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
103 with open(fname) as f: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
104 tree = f.read().replace('\n', '') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
105 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
106 cur = conn.cursor() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
107 cur.execute('INSERT INTO gene_family (gene_family_id, gene_tree) VALUES (?, ?)', |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
108 (i, tree)) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
109 conn.commit() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
110 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
111 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
112 def __main__(): |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
113 parser = optparse.OptionParser() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
114 parser.add_option('-t', '--tree', action='append', help='Gene tree files') |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
115 parser.add_option('-a', '--align', action='append', help='Protein alignments in fasta_aln format') |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
116 parser.add_option('-g', '--gene', help='Gene features file in SQLite format') |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
117 parser.add_option('-o', '--output', help='Path of the output file') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
118 options, args = parser.parse_args() |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
119 if args: |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
120 raise Exception('Use options to provide inputs') |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
121 |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
122 if options.gene != options.output: |
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
123 shutil.copyfile(options.gene, options.output) |
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
124 |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
125 conn = sqlite3.connect(options.output) |
4
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
earlhaminst
parents:
3
diff
changeset
|
126 conn.execute('PRAGMA foreign_keys = ON') |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
127 create_tables(conn) |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
128 |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
129 for i, (tree, align) in enumerate(zip(options.tree, options.align), start=1): |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
130 newicktree_to_db(conn, i, tree) |
1
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
131 align_to_db(conn, i, align) |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
132 |
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
earlhaminst
parents:
0
diff
changeset
|
133 conn.close() |
0
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
134 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
135 |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
136 if __name__ == '__main__': |
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
earlhaminst
parents:
diff
changeset
|
137 __main__() |