Mercurial > repos > iuc > decontaminator
annotate predict.py @ 0:b856d3d95413 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
author | iuc |
---|---|
date | Mon, 09 Jan 2023 13:27:09 +0000 |
parents | |
children |
rev | line source |
---|---|
0
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
3 # Credits: Grigorii Sukhorukov, Macha Nikolski |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
4 import argparse |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
5 import os |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
6 from pathlib import Path |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
7 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
8 import numpy as np |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
9 import pandas as pd |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
10 from Bio import SeqIO |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
11 from models import model_10 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
12 from utils import preprocess as pp |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
13 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
14 os.environ["CUDA_VISIBLE_DEVICES"] = "" |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
15 os.environ["TF_XLA_FLAGS"] = "--tf_xla_cpu_global_jit" |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
16 # loglevel : |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
17 # 0 all printed |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
18 # 1 I not printed |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
19 # 2 I and W not printed |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
20 # 3 nothing printed |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
21 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
22 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
23 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
24 def predict_nn(ds_path, nn_weights_path, length, batch_size=256): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
25 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
26 Breaks down contigs into fragments |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
27 and uses pretrained neural networks to give predictions for fragments |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
28 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
29 try: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
30 seqs_ = list(SeqIO.parse(ds_path, "fasta")) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
31 except FileNotFoundError: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
32 raise Exception("test dataset was not found. Change ds variable") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
33 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
34 out_table = { |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
35 "id": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
36 "length": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
37 "fragment": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
38 "pred_vir": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
39 "pred_other": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
40 } |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
41 if not seqs_: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
42 raise ValueError("All sequences were smaller than length of the model") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
43 test_fragments = [] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
44 test_fragments_rc = [] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
45 for seq in seqs_: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
46 fragments_, fragments_rc, _ = \ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
47 pp.fragmenting( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
48 [seq], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
49 length, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
50 max_gap=0.8, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
51 sl_wind_step=int(length / 2) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
52 ) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
53 test_fragments.extend(fragments_) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
54 test_fragments_rc.extend(fragments_rc) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
55 for j in range(len(fragments_)): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
56 out_table["id"].append(seq.id) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
57 out_table["length"].append(len(seq.seq)) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
58 out_table["fragment"].append(j) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
59 test_encoded = pp.one_hot_encode(test_fragments) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
60 test_encoded_rc = pp.one_hot_encode(test_fragments_rc) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
61 model = model_10.model(length) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
62 model.load_weights(Path(nn_weights_path, f"model_{length}.h5")) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
63 prediction = model.predict([test_encoded, test_encoded_rc], batch_size) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
64 out_table['pred_vir'].extend(list(prediction[..., 1])) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
65 out_table['pred_other'].extend(list(prediction[..., 0])) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
66 print('Exporting predictions to csv file') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
67 df = pd.DataFrame(out_table) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
68 df['NN_decision'] = np.where(df['pred_vir'] > df['pred_other'], 'virus', 'other') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
69 return df |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
70 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
71 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
72 def predict_test(ds_path, length): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
73 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
74 Breaks down contigs into fragments |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
75 and gives 1 as prediction to all fragments |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
76 use only for testing! |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
77 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
78 try: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
79 seqs_ = list(SeqIO.parse(ds_path, "fasta")) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
80 except FileNotFoundError: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
81 raise Exception("test dataset was not found. Change ds variable") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
82 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
83 out_table = { |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
84 "id": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
85 "length": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
86 "fragment": [], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
87 } |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
88 if not seqs_: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
89 raise ValueError("All sequences were smaller than length of the model") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
90 for seq in seqs_: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
91 fragments_, fragments_rc, _ = \ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
92 pp.fragmenting( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
93 [seq], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
94 length, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
95 max_gap=0.8, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
96 sl_wind_step=int(length / 2) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
97 ) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
98 for j in range(len(fragments_)): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
99 out_table["id"].append(seq.id) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
100 out_table["length"].append(len(seq.seq)) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
101 out_table["fragment"].append(j) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
102 print('Exporting predictions to tsv file') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
103 df = pd.DataFrame(out_table) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
104 df['pred_vir'] = 1 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
105 df['pred_other'] = 0 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
106 df['NN_decision'] = 'virus' |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
107 return df |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
108 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
109 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
110 def predict_contigs(df): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
111 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
112 Based on predictions of predict_rf for fragments |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
113 gives a final prediction for the whole contig |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
114 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
115 df = ( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
116 df.groupby( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
117 ["id", |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
118 "length", |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
119 'NN_decision'], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
120 sort=False |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
121 ).size().unstack(fill_value=0) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
122 ) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
123 df = df.reset_index() |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
124 df = df.reindex( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
125 ['length', 'id', 'virus', 'other', ], |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
126 axis=1 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
127 ).fillna(value=0) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
128 df['decision'] = np.where(df['virus'] >= df['other'], 'virus', 'other') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
129 df = df.sort_values(by='length', ascending=False) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
130 df = df.loc[:, ['length', 'id', 'virus', 'other', 'decision']] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
131 df = df.rename( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
132 columns={ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
133 'virus': '# viral fragments', |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
134 'other': '# other fragments', |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
135 } |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
136 ) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
137 df['# viral / # total'] = (df['# viral fragments'] / (df['# viral fragments'] + df['# other fragments'])).round(3) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
138 df['# viral / # total * length'] = df['# viral / # total'] * df['length'] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
139 df = df.sort_values(by='# viral / # total * length', ascending=False) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
140 return df |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
141 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
142 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
143 def predict(test_ds, weights, out_path, return_viral=True): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
144 """filters out contaminant contigs from the fasta file. |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
145 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
146 test_ds: path to the input file with |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
147 contigs in fasta format (str or list of str) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
148 weights: path to the folder containing weights |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
149 for NN and RF modules trained on 500 and 1000 fragment lengths (str) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
150 out_path: path to the folder to store predictions (str) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
151 return_viral: whether to return contigs annotated as |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
152 viral in separate fasta file (True/False) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
153 """ |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
154 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
155 test_ds = test_ds |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
156 if isinstance(test_ds, list): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
157 pass |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
158 elif isinstance(test_ds, str): |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
159 test_ds = [test_ds] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
160 else: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
161 raise ValueError('test_ds was incorrectly assigned in the config file') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
162 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
163 assert Path(test_ds[0]).exists(), f'{test_ds[0]} does not exist' |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
164 # assert Path(weights).exists(), f'{weights} does not exist' |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
165 limit = 0 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
166 Path(out_path).mkdir(parents=True, exist_ok=True) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
167 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
168 # parameter to activate test function. Only for debugging on github |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
169 # test is launched when the weights directory is empty |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
170 use_test_f = not Path(weights, 'model_1000.h5').exists() |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
171 for ts in test_ds: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
172 dfs_fr = [] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
173 dfs_cont = [] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
174 for l_ in 500, 1000: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
175 print(f'starting prediction for {Path(ts).name} ' |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
176 f'for fragment length {l_}') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
177 if use_test_f: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
178 df = predict_test(ds_path=ts, length=l_, ) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
179 else: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
180 df = predict_nn( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
181 ds_path=ts, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
182 nn_weights_path=weights, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
183 length=l_, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
184 ) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
185 df = df.round(3) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
186 dfs_fr.append(df) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
187 df = predict_contigs(df) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
188 dfs_cont.append(df) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
189 print('prediction finished') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
190 df_500 = dfs_fr[0][(dfs_fr[0]['length'] >= limit) & (dfs_fr[0]['length'] < 1500)] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
191 df_1000 = dfs_fr[1][(dfs_fr[1]['length'] >= 1500)] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
192 df = pd.concat([df_1000, df_500], ignore_index=True) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
193 pred_fr = Path(out_path, "predicted_fragments.tsv") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
194 df.to_csv(pred_fr, sep='\t') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
195 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
196 df_500 = dfs_cont[0][(dfs_cont[0]['length'] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
197 >= limit) & (dfs_cont[0]['length'] < 1500)] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
198 df_1000 = dfs_cont[1][(dfs_cont[1]['length'] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
199 >= 1500)] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
200 df = pd.concat([df_1000, df_500], ignore_index=True) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
201 pred_contigs = Path(out_path, "predicted.tsv") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
202 df.to_csv(pred_contigs, sep='\t') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
203 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
204 if return_viral: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
205 viral_ids = list(df[df["decision"] == "virus"]["id"]) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
206 seqs_ = list(SeqIO.parse(ts, "fasta")) |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
207 viral_seqs = [s_ for s_ in seqs_ if s_.id in viral_ids] |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
208 SeqIO.write( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
209 viral_seqs, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
210 Path( |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
211 out_path, |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
212 "viral.fasta"), 'fasta') |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
213 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
214 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
215 if __name__ == '__main__': |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
216 parser = argparse.ArgumentParser() |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
217 parser.add_argument("--test_ds", help="path to the input " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
218 "file with contigs " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
219 "in fasta format " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
220 "(str or list of str)") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
221 parser.add_argument("--weights", help="path to the folder containing " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
222 "weights for NN and RF modules " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
223 "trained on 500 and 1000 " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
224 "fragment lengths (str)") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
225 parser.add_argument("--out_path", help="path to the folder to store " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
226 "predictions (str)") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
227 parser.add_argument("--return_viral", help="whether to return " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
228 "contigs annotated " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
229 "as viral in separate " |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
230 "fasta file (True/False)") |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
231 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
232 args = parser.parse_args() |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
233 if args.test_ds: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
234 test_ds = args.test_ds |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
235 if args.weights: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
236 weights = args.weights |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
237 if args.out_path: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
238 out_path = args.out_path |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
239 if args.return_viral: |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
240 return_viral = args.return_viral |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
241 |
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff
changeset
|
242 predict(test_ds, weights, out_path, return_viral) |