Mercurial > repos > galaxy-australia > alphafold2
annotate docker/alphafold/alphafold/data/pipeline_multimer.py @ 1:6c92e000d684 draft
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
author | galaxy-australia |
---|---|
date | Tue, 01 Mar 2022 02:53:05 +0000 |
parents | |
children |
rev | line source |
---|---|
1
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
1 # Copyright 2021 DeepMind Technologies Limited |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
2 # |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
3 # Licensed under the Apache License, Version 2.0 (the "License"); |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
4 # you may not use this file except in compliance with the License. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
5 # You may obtain a copy of the License at |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
6 # |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
7 # http://www.apache.org/licenses/LICENSE-2.0 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
8 # |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
9 # Unless required by applicable law or agreed to in writing, software |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
10 # distributed under the License is distributed on an "AS IS" BASIS, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
12 # See the License for the specific language governing permissions and |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
13 # limitations under the License. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
14 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
15 """Functions for building the features for the AlphaFold multimer model.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
16 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
17 import collections |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
18 import contextlib |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
19 import copy |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
20 import dataclasses |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
21 import json |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
22 import os |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
23 import tempfile |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
24 from typing import Mapping, MutableMapping, Sequence |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
25 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
26 from absl import logging |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
27 from alphafold.common import protein |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
28 from alphafold.common import residue_constants |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
29 from alphafold.data import feature_processing |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
30 from alphafold.data import msa_pairing |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
31 from alphafold.data import parsers |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
32 from alphafold.data import pipeline |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
33 from alphafold.data.tools import jackhmmer |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
34 import numpy as np |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
35 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
36 # Internal import (7716). |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
37 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
38 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
39 @dataclasses.dataclass(frozen=True) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
40 class _FastaChain: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
41 sequence: str |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
42 description: str |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
43 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
44 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
45 def _make_chain_id_map(*, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
46 sequences: Sequence[str], |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
47 descriptions: Sequence[str], |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
48 ) -> Mapping[str, _FastaChain]: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
49 """Makes a mapping from PDB-format chain ID to sequence and description.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
50 if len(sequences) != len(descriptions): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
51 raise ValueError('sequences and descriptions must have equal length. ' |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
52 f'Got {len(sequences)} != {len(descriptions)}.') |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
53 if len(sequences) > protein.PDB_MAX_CHAINS: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
54 raise ValueError('Cannot process more chains than the PDB format supports. ' |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
55 f'Got {len(sequences)} chains.') |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
56 chain_id_map = {} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
57 for chain_id, sequence, description in zip( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
58 protein.PDB_CHAIN_IDS, sequences, descriptions): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
59 chain_id_map[chain_id] = _FastaChain( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
60 sequence=sequence, description=description) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
61 return chain_id_map |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
62 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
63 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
64 @contextlib.contextmanager |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
65 def temp_fasta_file(fasta_str: str): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
66 with tempfile.NamedTemporaryFile('w', suffix='.fasta') as fasta_file: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
67 fasta_file.write(fasta_str) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
68 fasta_file.seek(0) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
69 yield fasta_file.name |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
70 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
71 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
72 def convert_monomer_features( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
73 monomer_features: pipeline.FeatureDict, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
74 chain_id: str) -> pipeline.FeatureDict: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
75 """Reshapes and modifies monomer features for multimer models.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
76 converted = {} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
77 converted['auth_chain_id'] = np.asarray(chain_id, dtype=np.object_) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
78 unnecessary_leading_dim_feats = { |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
79 'sequence', 'domain_name', 'num_alignments', 'seq_length'} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
80 for feature_name, feature in monomer_features.items(): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
81 if feature_name in unnecessary_leading_dim_feats: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
82 # asarray ensures it's a np.ndarray. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
83 feature = np.asarray(feature[0], dtype=feature.dtype) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
84 elif feature_name == 'aatype': |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
85 # The multimer model performs the one-hot operation itself. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
86 feature = np.argmax(feature, axis=-1).astype(np.int32) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
87 elif feature_name == 'template_aatype': |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
88 feature = np.argmax(feature, axis=-1).astype(np.int32) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
89 new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
90 feature = np.take(new_order_list, feature.astype(np.int32), axis=0) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
91 elif feature_name == 'template_all_atom_masks': |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
92 feature_name = 'template_all_atom_mask' |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
93 converted[feature_name] = feature |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
94 return converted |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
95 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
96 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
97 def int_id_to_str_id(num: int) -> str: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
98 """Encodes a number as a string, using reverse spreadsheet style naming. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
99 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
100 Args: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
101 num: A positive integer. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
102 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
103 Returns: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
104 A string that encodes the positive integer using reverse spreadsheet style, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
105 naming e.g. 1 = A, 2 = B, ..., 27 = AA, 28 = BA, 29 = CA, ... This is the |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
106 usual way to encode chain IDs in mmCIF files. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
107 """ |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
108 if num <= 0: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
109 raise ValueError(f'Only positive integers allowed, got {num}.') |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
110 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
111 num = num - 1 # 1-based indexing. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
112 output = [] |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
113 while num >= 0: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
114 output.append(chr(num % 26 + ord('A'))) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
115 num = num // 26 - 1 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
116 return ''.join(output) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
117 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
118 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
119 def add_assembly_features( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
120 all_chain_features: MutableMapping[str, pipeline.FeatureDict], |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
121 ) -> MutableMapping[str, pipeline.FeatureDict]: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
122 """Add features to distinguish between chains. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
123 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
124 Args: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
125 all_chain_features: A dictionary which maps chain_id to a dictionary of |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
126 features for each chain. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
127 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
128 Returns: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
129 all_chain_features: A dictionary which maps strings of the form |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
130 `<seq_id>_<sym_id>` to the corresponding chain features. E.g. two |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
131 chains from a homodimer would have keys A_1 and A_2. Two chains from a |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
132 heterodimer would have keys A_1 and B_1. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
133 """ |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
134 # Group the chains by sequence |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
135 seq_to_entity_id = {} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
136 grouped_chains = collections.defaultdict(list) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
137 for chain_id, chain_features in all_chain_features.items(): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
138 seq = str(chain_features['sequence']) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
139 if seq not in seq_to_entity_id: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
140 seq_to_entity_id[seq] = len(seq_to_entity_id) + 1 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
141 grouped_chains[seq_to_entity_id[seq]].append(chain_features) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
142 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
143 new_all_chain_features = {} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
144 chain_id = 1 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
145 for entity_id, group_chain_features in grouped_chains.items(): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
146 for sym_id, chain_features in enumerate(group_chain_features, start=1): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
147 new_all_chain_features[ |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
148 f'{int_id_to_str_id(entity_id)}_{sym_id}'] = chain_features |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
149 seq_length = chain_features['seq_length'] |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
150 chain_features['asym_id'] = chain_id * np.ones(seq_length) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
151 chain_features['sym_id'] = sym_id * np.ones(seq_length) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
152 chain_features['entity_id'] = entity_id * np.ones(seq_length) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
153 chain_id += 1 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
154 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
155 return new_all_chain_features |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
156 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
157 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
158 def pad_msa(np_example, min_num_seq): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
159 np_example = dict(np_example) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
160 num_seq = np_example['msa'].shape[0] |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
161 if num_seq < min_num_seq: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
162 for feat in ('msa', 'deletion_matrix', 'bert_mask', 'msa_mask'): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
163 np_example[feat] = np.pad( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
164 np_example[feat], ((0, min_num_seq - num_seq), (0, 0))) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
165 np_example['cluster_bias_mask'] = np.pad( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
166 np_example['cluster_bias_mask'], ((0, min_num_seq - num_seq),)) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
167 return np_example |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
168 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
169 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
170 class DataPipeline: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
171 """Runs the alignment tools and assembles the input features.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
172 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
173 def __init__(self, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
174 monomer_data_pipeline: pipeline.DataPipeline, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
175 jackhmmer_binary_path: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
176 uniprot_database_path: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
177 max_uniprot_hits: int = 50000, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
178 use_precomputed_msas: bool = False): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
179 """Initializes the data pipeline. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
180 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
181 Args: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
182 monomer_data_pipeline: An instance of pipeline.DataPipeline - that runs |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
183 the data pipeline for the monomer AlphaFold system. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
184 jackhmmer_binary_path: Location of the jackhmmer binary. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
185 uniprot_database_path: Location of the unclustered uniprot sequences, that |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
186 will be searched with jackhmmer and used for MSA pairing. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
187 max_uniprot_hits: The maximum number of hits to return from uniprot. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
188 use_precomputed_msas: Whether to use pre-existing MSAs; see run_alphafold. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
189 """ |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
190 self._monomer_data_pipeline = monomer_data_pipeline |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
191 self._uniprot_msa_runner = jackhmmer.Jackhmmer( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
192 binary_path=jackhmmer_binary_path, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
193 database_path=uniprot_database_path) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
194 self._max_uniprot_hits = max_uniprot_hits |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
195 self.use_precomputed_msas = use_precomputed_msas |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
196 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
197 def _process_single_chain( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
198 self, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
199 chain_id: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
200 sequence: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
201 description: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
202 msa_output_dir: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
203 is_homomer_or_monomer: bool) -> pipeline.FeatureDict: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
204 """Runs the monomer pipeline on a single chain.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
205 chain_fasta_str = f'>chain_{chain_id}\n{sequence}\n' |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
206 chain_msa_output_dir = os.path.join(msa_output_dir, chain_id) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
207 if not os.path.exists(chain_msa_output_dir): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
208 os.makedirs(chain_msa_output_dir) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
209 with temp_fasta_file(chain_fasta_str) as chain_fasta_path: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
210 logging.info('Running monomer pipeline on chain %s: %s', |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
211 chain_id, description) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
212 chain_features = self._monomer_data_pipeline.process( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
213 input_fasta_path=chain_fasta_path, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
214 msa_output_dir=chain_msa_output_dir) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
215 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
216 # We only construct the pairing features if there are 2 or more unique |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
217 # sequences. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
218 if not is_homomer_or_monomer: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
219 all_seq_msa_features = self._all_seq_msa_features(chain_fasta_path, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
220 chain_msa_output_dir) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
221 chain_features.update(all_seq_msa_features) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
222 return chain_features |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
223 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
224 def _all_seq_msa_features(self, input_fasta_path, msa_output_dir): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
225 """Get MSA features for unclustered uniprot, for pairing.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
226 out_path = os.path.join(msa_output_dir, 'uniprot_hits.sto') |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
227 result = pipeline.run_msa_tool( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
228 self._uniprot_msa_runner, input_fasta_path, out_path, 'sto', |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
229 self.use_precomputed_msas) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
230 msa = parsers.parse_stockholm(result['sto']) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
231 msa = msa.truncate(max_seqs=self._max_uniprot_hits) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
232 all_seq_features = pipeline.make_msa_features([msa]) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
233 valid_feats = msa_pairing.MSA_FEATURES + ( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
234 'msa_uniprot_accession_identifiers', |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
235 'msa_species_identifiers', |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
236 ) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
237 feats = {f'{k}_all_seq': v for k, v in all_seq_features.items() |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
238 if k in valid_feats} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
239 return feats |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
240 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
241 def process(self, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
242 input_fasta_path: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
243 msa_output_dir: str, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
244 is_prokaryote: bool = False) -> pipeline.FeatureDict: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
245 """Runs alignment tools on the input sequences and creates features.""" |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
246 with open(input_fasta_path) as f: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
247 input_fasta_str = f.read() |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
248 input_seqs, input_descs = parsers.parse_fasta(input_fasta_str) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
249 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
250 chain_id_map = _make_chain_id_map(sequences=input_seqs, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
251 descriptions=input_descs) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
252 chain_id_map_path = os.path.join(msa_output_dir, 'chain_id_map.json') |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
253 with open(chain_id_map_path, 'w') as f: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
254 chain_id_map_dict = {chain_id: dataclasses.asdict(fasta_chain) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
255 for chain_id, fasta_chain in chain_id_map.items()} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
256 json.dump(chain_id_map_dict, f, indent=4, sort_keys=True) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
257 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
258 all_chain_features = {} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
259 sequence_features = {} |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
260 is_homomer_or_monomer = len(set(input_seqs)) == 1 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
261 for chain_id, fasta_chain in chain_id_map.items(): |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
262 if fasta_chain.sequence in sequence_features: |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
263 all_chain_features[chain_id] = copy.deepcopy( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
264 sequence_features[fasta_chain.sequence]) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
265 continue |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
266 chain_features = self._process_single_chain( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
267 chain_id=chain_id, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
268 sequence=fasta_chain.sequence, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
269 description=fasta_chain.description, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
270 msa_output_dir=msa_output_dir, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
271 is_homomer_or_monomer=is_homomer_or_monomer) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
272 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
273 chain_features = convert_monomer_features(chain_features, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
274 chain_id=chain_id) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
275 all_chain_features[chain_id] = chain_features |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
276 sequence_features[fasta_chain.sequence] = chain_features |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
277 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
278 all_chain_features = add_assembly_features(all_chain_features) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
279 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
280 np_example = feature_processing.pair_and_merge( |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
281 all_chain_features=all_chain_features, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
282 is_prokaryote=is_prokaryote, |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
283 ) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
284 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
285 # Pad MSA to avoid zero-sized extra_msa. |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
286 np_example = pad_msa(np_example, 512) |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
287 |
6c92e000d684
"planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86"
galaxy-australia
parents:
diff
changeset
|
288 return np_example |