Mercurial > repos > galaxy-australia > alphafold2
annotate scripts/validate_fasta.py @ 22:3f188450ca4f draft default tip
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit d626bb28203543a70d3fc60d662cb054bc3cef7c
author | galaxy-australia |
---|---|
date | Wed, 30 Oct 2024 21:46:34 +0000 |
parents | 2f7702fd0a4c |
children |
rev | line source |
---|---|
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
1 """Validate input FASTA sequence.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
2 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
3 import argparse |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
4 import re |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
5 import sys |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
6 from typing import List |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
7 |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
8 DEFAULT_MAX_SEQUENCE_COUNT = 10 |
18
e4a053d67e24
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents:
16
diff
changeset
|
9 STRIP_SEQUENCE_CHARS = ['\n', '\r', '\t', ' '] |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
10 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
11 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
12 class Fasta: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
13 def __init__(self, header_str: str, seq_str: str): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
14 self.header = header_str |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
15 self.aa_seq = seq_str |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
16 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
17 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
18 class FastaLoader: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
19 def __init__(self, fasta_path: str): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
20 """Initialize from FASTA file.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
21 self.fastas = [] |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
22 self.load(fasta_path) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
23 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
24 def load(self, fasta_path: str): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
25 """Load bare or FASTA formatted sequence.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
26 with open(fasta_path, 'r') as f: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
27 self.content = f.read() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
28 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
29 if "__cn__" in self.content: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
30 # Pasted content with escaped characters |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
31 self.newline = '__cn__' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
32 self.read_caret = '__gt__' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
33 else: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
34 # Uploaded file with normal content |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
35 self.newline = '\n' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
36 self.read_caret = '>' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
37 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
38 self.lines = self.content.split(self.newline) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
39 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
40 if not self.lines[0].startswith(self.read_caret): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
41 # Fasta is headless, load as single sequence |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
42 self.update_fastas( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
43 '', ''.join(self.lines) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
44 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
45 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
46 else: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
47 header = None |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
48 sequence = None |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
49 for line in self.lines: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
50 if line.startswith(self.read_caret): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
51 if header: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
52 self.update_fastas(header, sequence) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
53 header = '>' + self.strip_header(line) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
54 sequence = '' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
55 else: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
56 sequence += line.strip('\n ') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
57 self.update_fastas(header, sequence) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
58 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
59 def strip_header(self, line): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
60 """Strip characters escaped with underscores from pasted text.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
61 return re.sub(r'\_\_.{2}\_\_', '', line).strip('>') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
62 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
63 def update_fastas(self, header: str, sequence: str): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
64 # if we have a sequence |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
65 if sequence: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
66 # create generic header if not exists |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
67 if not header: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
68 fasta_count = len(self.fastas) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
69 header = f'>sequence_{fasta_count}' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
70 |
18
e4a053d67e24
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents:
16
diff
changeset
|
71 for char in STRIP_SEQUENCE_CHARS: |
e4a053d67e24
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents:
16
diff
changeset
|
72 sequence = sequence.replace(char, '') |
e4a053d67e24
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents:
16
diff
changeset
|
73 |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
74 # Create new Fasta |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
75 self.fastas.append(Fasta(header, sequence)) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
76 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
77 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
78 class FastaValidator: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
79 def __init__( |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
80 self, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
81 min_length=None, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
82 max_length=None, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
83 multiple=False, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
84 max_sequence_count=None, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
85 ): |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
86 self.multiple = multiple |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
87 self.min_length = min_length |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
88 self.max_length = max_length |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
89 self.iupac_characters = { |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
90 'A', 'B', 'C', 'D', 'E', 'F', 'G', |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
91 'H', 'I', 'K', 'L', 'M', 'N', 'P', |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
92 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
93 'Y', 'Z', '-' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
94 } |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
95 self.max_sequence_count = ( |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
96 max_sequence_count |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
97 or DEFAULT_MAX_SEQUENCE_COUNT) |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
98 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
99 def validate(self, fasta_list: List[Fasta]): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
100 """Perform FASTA validation.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
101 self.fasta_list = fasta_list |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
102 self.validate_num_seqs() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
103 self.validate_length() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
104 self.validate_alphabet() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
105 # not checking for 'X' nucleotides at the moment. |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
106 # alphafold can throw an error if it doesn't like it. |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
107 # self.validate_x() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
108 return self.fasta_list |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
109 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
110 def validate_num_seqs(self) -> None: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
111 """Assert that only one sequence has been provided.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
112 fasta_count = len(self.fasta_list) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
113 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
114 if self.multiple: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
115 if fasta_count < 2: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
116 raise ValueError( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
117 'Error encountered validating FASTA:\n' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
118 'Multimer mode requires multiple input sequence.' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
119 f' Only {fasta_count} sequences were detected in' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
120 ' the provided file.') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
121 |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
122 elif fasta_count > self.max_sequence_count: |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
123 raise ValueError( |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
124 f'WARNING: detected {fasta_count} sequences but the' |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
125 f' maximum allowed is {self.max_sequence_count}' |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
126 ' sequences.') |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
127 else: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
128 if fasta_count > 1: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
129 sys.stderr.write( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
130 'WARNING: More than 1 sequence detected.' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
131 ' Using first FASTA sequence as input.\n') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
132 self.fasta_list = self.fasta_list[:1] |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
133 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
134 elif len(self.fasta_list) == 0: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
135 raise ValueError( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
136 'Error encountered validating FASTA:\n' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
137 ' no FASTA sequences detected in input file.') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
138 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
139 def validate_length(self): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
140 """Confirm whether sequence length is valid.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
141 fasta = self.fasta_list[0] |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
142 if self.min_length: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
143 if len(fasta.aa_seq) < self.min_length: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
144 raise ValueError( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
145 'Error encountered validating FASTA:\n Sequence too short' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
146 f' ({len(fasta.aa_seq)}AA).' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
147 f' Minimum length is {self.min_length}AA.') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
148 if self.max_length: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
149 if len(fasta.aa_seq) > self.max_length: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
150 raise ValueError( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
151 'Error encountered validating FASTA:\n' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
152 f' Sequence too long ({len(fasta.aa_seq)}AA).' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
153 f' Maximum length is {self.max_length}AA.') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
154 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
155 def validate_alphabet(self): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
156 """Confirm whether the sequence conforms to IUPAC codes. |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
157 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
158 If not, report the offending character and its position. |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
159 """ |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
160 fasta = self.fasta_list[0] |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
161 for i, char in enumerate(fasta.aa_seq.upper()): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
162 if char not in self.iupac_characters: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
163 raise ValueError( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
164 'Error encountered validating FASTA:\n Invalid amino acid' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
165 f' found at pos {i}: "{char}"') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
166 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
167 def validate_x(self): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
168 """Check for X bases.""" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
169 fasta = self.fasta_list[0] |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
170 for i, char in enumerate(fasta.aa_seq.upper()): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
171 if char == 'X': |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
172 raise ValueError( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
173 'Error encountered validating FASTA:\n Unsupported AA code' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
174 f' "X" found at pos {i}') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
175 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
176 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
177 class FastaWriter: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
178 def __init__(self) -> None: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
179 self.line_wrap = 60 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
180 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
181 def write(self, fasta: Fasta): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
182 header = fasta.header |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
183 seq = self.format_sequence(fasta.aa_seq) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
184 sys.stdout.write(header + '\n') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
185 sys.stdout.write(seq) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
186 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
187 def format_sequence(self, aa_seq: str): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
188 formatted_seq = '' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
189 for i in range(0, len(aa_seq), self.line_wrap): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
190 formatted_seq += aa_seq[i: i + self.line_wrap] + '\n' |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
191 return formatted_seq.upper() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
192 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
193 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
194 def main(): |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
195 # load fasta file |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
196 try: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
197 args = parse_args() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
198 fas = FastaLoader(args.input) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
199 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
200 # validate |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
201 fv = FastaValidator( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
202 min_length=args.min_length, |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
203 max_length=args.max_length, |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
204 multiple=args.multimer, |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
205 max_sequence_count=args.max_sequence_count, |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
206 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
207 clean_fastas = fv.validate(fas.fastas) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
208 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
209 # write clean data |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
210 fw = FastaWriter() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
211 for fas in clean_fastas: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
212 fw.write(fas) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
213 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
214 sys.stderr.write("Validated FASTA sequence(s):\n\n") |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
215 for fas in clean_fastas: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
216 sys.stderr.write(fas.header + '\n') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
217 sys.stderr.write(fas.aa_seq + '\n\n') |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
218 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
219 except ValueError as exc: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
220 sys.stderr.write(f"{exc}\n\n") |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
221 raise exc |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
222 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
223 except Exception as exc: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
224 sys.stderr.write( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
225 "Input error: FASTA input is invalid. Please check your input.\n\n" |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
226 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
227 raise exc |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
228 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
229 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
230 def parse_args() -> argparse.Namespace: |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
231 parser = argparse.ArgumentParser() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
232 parser.add_argument( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
233 "input", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
234 help="input fasta file", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
235 type=str |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
236 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
237 parser.add_argument( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
238 "--min_length", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
239 dest='min_length', |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
240 help="Minimum length of input protein sequence (AA)", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
241 default=None, |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
242 type=int, |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
243 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
244 parser.add_argument( |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
245 "--max_length", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
246 dest='max_length', |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
247 help="Maximum length of input protein sequence (AA)", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
248 default=None, |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
249 type=int, |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
250 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
251 parser.add_argument( |
19
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
252 "--max-sequences", |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
253 dest='max_sequence_count', |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
254 help="Maximum number of input sequences", |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
255 default=None, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
256 type=int, |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
257 ) |
2f7702fd0a4c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents:
18
diff
changeset
|
258 parser.add_argument( |
16
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
259 "--multimer", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
260 action='store_true', |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
261 help="Require multiple input sequences", |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
262 ) |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
263 return parser.parse_args() |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
264 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
265 |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
266 if __name__ == '__main__': |
f9eb041c518c
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff
changeset
|
267 main() |