Mercurial > repos > earlhaminst > gafa
annotate GAFA.py @ 10:c22276db4025 draft default tip
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/main/tools/GAFA/ commit a0d7ab86b86bb764e457767bf8e8bc29868d0cbb
| author | earlhaminst | 
|---|---|
| date | Mon, 10 Mar 2025 23:27:17 +0000 | 
| parents | ab5611663f32 | 
| children | 
| rev | line source | 
|---|---|
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
1 import collections | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
2 import optparse | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
3 import re | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
4 import shutil | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
5 import sqlite3 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
6 | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
7 version = "0.3.0" | 
| 
7
 
b9f1bcf5ee59
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
 
earlhaminst 
parents: 
4 
diff
changeset
 | 
8 compatible_version = ['0.3.0', '0.4.0'] | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
9 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
10 Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
11 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
12 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
13 def FASTAReader_gen(fasta_filename): | 
| 
3
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
14 with open(fasta_filename) as fasta_file: | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
15 line = fasta_file.readline() | 
| 
3
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
16 while True: | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
17 if not line: | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
18 return | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
19 assert line.startswith('>'), "FASTA headers must start with >" | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
20 header = line.rstrip() | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
21 sequence_parts = [] | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
22 line = fasta_file.readline() | 
| 
3
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
23 while line and line[0] != '>': | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
24 sequence_parts.append(line.rstrip()) | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
25 line = fasta_file.readline() | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
26 sequence = "".join(sequence_parts) | 
| 
 
e17a3470c70a
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
 
earlhaminst 
parents: 
1 
diff
changeset
 | 
27 yield Sequence(header, sequence) | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
28 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
29 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
30 FASTA_MATCH_RE = re.compile(r'[^-]') | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
31 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
32 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
33 def fasta_aln2cigar(sequence): | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
34 # Converts each match into M and each gap into D | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
35 tmp_seq = FASTA_MATCH_RE.sub('M', sequence) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
36 tmp_seq = tmp_seq.replace('-', 'D') | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
37 # Split the sequence in substrings composed by the same letter | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
38 tmp_seq = tmp_seq.replace('DM', 'D,M') | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
39 tmp_seq = tmp_seq.replace('MD', 'M,D') | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
40 cigar_list = tmp_seq.split(',') | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
41 # Condense each substring, e.g. DDDD in 4D, and concatenate them again | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
42 cigar = '' | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
43 for s in cigar_list: | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
44 if len(s) > 1: | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
45 cigar += str(len(s)) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
46 cigar += s[0] | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
47 return cigar | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
48 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
49 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
50 def create_tables(conn): | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
51 cur = conn.cursor() | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
52 # Check that the version of the input database is compatible | 
| 
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
53 cur.execute('SELECT version FROM meta') | 
| 
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
54 result = cur.fetchone() | 
| 
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
55 input_meta_version = result[0] | 
| 
7
 
b9f1bcf5ee59
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
 
earlhaminst 
parents: 
4 
diff
changeset
 | 
56 if input_meta_version not in compatible_version: | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
57 raise Exception("Incompatible input meta version '%s'" % input_meta_version) | 
| 
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
58 cur.execute('UPDATE meta SET version=?', | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
59 (version, )) | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
60 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
61 cur.execute('''CREATE TABLE gene_family ( | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
62 gene_family_id INTEGER PRIMARY KEY, | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
63 gene_tree VARCHAR NOT NULL)''') | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
64 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
65 cur.execute('''CREATE TABLE gene_family_member ( | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
66 gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id), | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
67 protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id), | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
68 protein_alignment VARCHAR NOT NULL, | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
69 PRIMARY KEY (gene_family_id, protein_id))''') | 
| 
7
 
b9f1bcf5ee59
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit fa875eea77a9471acada2b7b8882a0467994c960
 
earlhaminst 
parents: 
4 
diff
changeset
 | 
70 | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
71 conn.commit() | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
72 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
73 | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
74 def align_to_db(conn, i, fname): | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
75 cur = conn.cursor() | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
76 for fasta_seq_align in FASTAReader_gen(fname): | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
77 seq_id = fasta_seq_align.header[1:] | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
78 # Trim seq_id by removing everything from the first underscore | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
79 seq_id = seq_id.split('_', 1)[0] | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
80 | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
81 cur.execute('SELECT transcript_id, protein_id FROM transcript WHERE transcript_id=? OR protein_id=?', | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
82 (seq_id, seq_id)) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
83 results = cur.fetchall() | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
84 if len(results) == 0: | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
85 raise Exception("Sequence id '%s' could not be found among the transcript and protein ids" % seq_id) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
86 elif len(results) > 1: | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
87 raise Exception("Searching sequence id '%s' among the transcript and protein ids returned multiple results" % seq_id) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
88 transcript_id, protein_id = results[0] | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
89 if protein_id is None: | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
90 print("Skipping transcript '%s' with no protein id" % transcript_id) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
91 else: | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
92 cigar = fasta_aln2cigar(fasta_seq_align.sequence) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
93 cur.execute('INSERT INTO gene_family_member (gene_family_id, protein_id, protein_alignment) VALUES (?, ?, ?)', | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
94 (i, protein_id, cigar)) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
95 protein_sequence = fasta_seq_align.sequence.replace('-', '') | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
96 cur.execute('UPDATE transcript SET protein_sequence=? WHERE protein_id=?', (protein_sequence, protein_id)) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
97 conn.commit() | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
98 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
99 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
100 def newicktree_to_db(conn, i, fname): | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
101 with open(fname) as f: | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
102 tree = f.read().replace('\n', '') | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
103 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
104 cur = conn.cursor() | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
105 cur.execute('INSERT INTO gene_family (gene_family_id, gene_tree) VALUES (?, ?)', | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
106 (i, tree)) | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
107 conn.commit() | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
108 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
109 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
110 def __main__(): | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
111 parser = optparse.OptionParser() | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
112 parser.add_option('-t', '--tree', action='append', help='Gene tree files') | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
113 parser.add_option('-a', '--align', action='append', help='Protein alignments in fasta_aln format') | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
114 parser.add_option('-g', '--gene', help='Gene features file in SQLite format') | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
115 parser.add_option('-o', '--output', help='Path of the output file') | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
116 options, args = parser.parse_args() | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
117 if args: | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
118 raise Exception('Use options to provide inputs') | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
119 | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
120 if options.gene != options.output: | 
| 
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
121 shutil.copyfile(options.gene, options.output) | 
| 
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
122 | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
123 conn = sqlite3.connect(options.output) | 
| 
4
 
117fc7414307
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 651fae48371f845578753052c6fe173e3bb35670
 
earlhaminst 
parents: 
3 
diff
changeset
 | 
124 conn.execute('PRAGMA foreign_keys = ON') | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
125 create_tables(conn) | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
126 | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
127 for i, (tree, align) in enumerate(zip(options.tree, options.align), start=1): | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
128 newicktree_to_db(conn, i, tree) | 
| 
1
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
129 align_to_db(conn, i, align) | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
130 | 
| 
 
fc8ca4ade638
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
 
earlhaminst 
parents: 
0 
diff
changeset
 | 
131 conn.close() | 
| 
0
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
132 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
133 | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
134 if __name__ == '__main__': | 
| 
 
af9f72ddf7f9
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
 
earlhaminst 
parents:  
diff
changeset
 | 
135 __main__() | 
