annotate scripts/validate_fasta.py @ 19:2f7702fd0a4c draft default tip

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
author galaxy-australia
date Wed, 08 May 2024 06:26:55 +0000
parents e4a053d67e24
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
1 """Validate input FASTA sequence."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
2
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
3 import argparse
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
4 import re
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
5 import sys
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
6 from typing import List
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
7
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
8 DEFAULT_MAX_SEQUENCE_COUNT = 10
18
e4a053d67e24 planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents: 16
diff changeset
9 STRIP_SEQUENCE_CHARS = ['\n', '\r', '\t', ' ']
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
10
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
11
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
12 class Fasta:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
13 def __init__(self, header_str: str, seq_str: str):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
14 self.header = header_str
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
15 self.aa_seq = seq_str
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
17
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
18 class FastaLoader:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
19 def __init__(self, fasta_path: str):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
20 """Initialize from FASTA file."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
21 self.fastas = []
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
22 self.load(fasta_path)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
23
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
24 def load(self, fasta_path: str):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
25 """Load bare or FASTA formatted sequence."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
26 with open(fasta_path, 'r') as f:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
27 self.content = f.read()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
28
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
29 if "__cn__" in self.content:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
30 # Pasted content with escaped characters
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
31 self.newline = '__cn__'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
32 self.read_caret = '__gt__'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
33 else:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
34 # Uploaded file with normal content
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
35 self.newline = '\n'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
36 self.read_caret = '>'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
37
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
38 self.lines = self.content.split(self.newline)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
39
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
40 if not self.lines[0].startswith(self.read_caret):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
41 # Fasta is headless, load as single sequence
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
42 self.update_fastas(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
43 '', ''.join(self.lines)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
44 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
45
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
46 else:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
47 header = None
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
48 sequence = None
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
49 for line in self.lines:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
50 if line.startswith(self.read_caret):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
51 if header:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
52 self.update_fastas(header, sequence)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
53 header = '>' + self.strip_header(line)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
54 sequence = ''
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
55 else:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
56 sequence += line.strip('\n ')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
57 self.update_fastas(header, sequence)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
58
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
59 def strip_header(self, line):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
60 """Strip characters escaped with underscores from pasted text."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
61 return re.sub(r'\_\_.{2}\_\_', '', line).strip('>')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
62
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
63 def update_fastas(self, header: str, sequence: str):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
64 # if we have a sequence
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
65 if sequence:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
66 # create generic header if not exists
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
67 if not header:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
68 fasta_count = len(self.fastas)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
69 header = f'>sequence_{fasta_count}'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
70
18
e4a053d67e24 planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents: 16
diff changeset
71 for char in STRIP_SEQUENCE_CHARS:
e4a053d67e24 planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents: 16
diff changeset
72 sequence = sequence.replace(char, '')
e4a053d67e24 planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
galaxy-australia
parents: 16
diff changeset
73
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
74 # Create new Fasta
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
75 self.fastas.append(Fasta(header, sequence))
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
76
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
77
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
78 class FastaValidator:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
79 def __init__(
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
80 self,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
81 min_length=None,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
82 max_length=None,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
83 multiple=False,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
84 max_sequence_count=None,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
85 ):
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
86 self.multiple = multiple
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
87 self.min_length = min_length
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
88 self.max_length = max_length
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
89 self.iupac_characters = {
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
90 'A', 'B', 'C', 'D', 'E', 'F', 'G',
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
91 'H', 'I', 'K', 'L', 'M', 'N', 'P',
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
92 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
93 'Y', 'Z', '-'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
94 }
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
95 self.max_sequence_count = (
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
96 max_sequence_count
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
97 or DEFAULT_MAX_SEQUENCE_COUNT)
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
98
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
99 def validate(self, fasta_list: List[Fasta]):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
100 """Perform FASTA validation."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
101 self.fasta_list = fasta_list
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
102 self.validate_num_seqs()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
103 self.validate_length()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
104 self.validate_alphabet()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
105 # not checking for 'X' nucleotides at the moment.
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
106 # alphafold can throw an error if it doesn't like it.
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
107 # self.validate_x()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
108 return self.fasta_list
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
109
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
110 def validate_num_seqs(self) -> None:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
111 """Assert that only one sequence has been provided."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
112 fasta_count = len(self.fasta_list)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
113
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
114 if self.multiple:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
115 if fasta_count < 2:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
116 raise ValueError(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
117 'Error encountered validating FASTA:\n'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
118 'Multimer mode requires multiple input sequence.'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
119 f' Only {fasta_count} sequences were detected in'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
120 ' the provided file.')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
121
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
122 elif fasta_count > self.max_sequence_count:
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
123 raise ValueError(
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
124 f'WARNING: detected {fasta_count} sequences but the'
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
125 f' maximum allowed is {self.max_sequence_count}'
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
126 ' sequences.')
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
127 else:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
128 if fasta_count > 1:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
129 sys.stderr.write(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
130 'WARNING: More than 1 sequence detected.'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
131 ' Using first FASTA sequence as input.\n')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
132 self.fasta_list = self.fasta_list[:1]
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
133
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
134 elif len(self.fasta_list) == 0:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
135 raise ValueError(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
136 'Error encountered validating FASTA:\n'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
137 ' no FASTA sequences detected in input file.')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
138
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
139 def validate_length(self):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
140 """Confirm whether sequence length is valid."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
141 fasta = self.fasta_list[0]
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
142 if self.min_length:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
143 if len(fasta.aa_seq) < self.min_length:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
144 raise ValueError(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
145 'Error encountered validating FASTA:\n Sequence too short'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
146 f' ({len(fasta.aa_seq)}AA).'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
147 f' Minimum length is {self.min_length}AA.')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
148 if self.max_length:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
149 if len(fasta.aa_seq) > self.max_length:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
150 raise ValueError(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
151 'Error encountered validating FASTA:\n'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
152 f' Sequence too long ({len(fasta.aa_seq)}AA).'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
153 f' Maximum length is {self.max_length}AA.')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
154
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
155 def validate_alphabet(self):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
156 """Confirm whether the sequence conforms to IUPAC codes.
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
157
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
158 If not, report the offending character and its position.
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
159 """
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
160 fasta = self.fasta_list[0]
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
161 for i, char in enumerate(fasta.aa_seq.upper()):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
162 if char not in self.iupac_characters:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
163 raise ValueError(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
164 'Error encountered validating FASTA:\n Invalid amino acid'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
165 f' found at pos {i}: "{char}"')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
166
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
167 def validate_x(self):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
168 """Check for X bases."""
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
169 fasta = self.fasta_list[0]
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
170 for i, char in enumerate(fasta.aa_seq.upper()):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
171 if char == 'X':
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
172 raise ValueError(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
173 'Error encountered validating FASTA:\n Unsupported AA code'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
174 f' "X" found at pos {i}')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
175
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
176
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
177 class FastaWriter:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
178 def __init__(self) -> None:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
179 self.line_wrap = 60
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
180
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
181 def write(self, fasta: Fasta):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
182 header = fasta.header
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
183 seq = self.format_sequence(fasta.aa_seq)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
184 sys.stdout.write(header + '\n')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
185 sys.stdout.write(seq)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
186
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
187 def format_sequence(self, aa_seq: str):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
188 formatted_seq = ''
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
189 for i in range(0, len(aa_seq), self.line_wrap):
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
190 formatted_seq += aa_seq[i: i + self.line_wrap] + '\n'
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
191 return formatted_seq.upper()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
192
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
193
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
194 def main():
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
195 # load fasta file
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
196 try:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
197 args = parse_args()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
198 fas = FastaLoader(args.input)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
199
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
200 # validate
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
201 fv = FastaValidator(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
202 min_length=args.min_length,
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
203 max_length=args.max_length,
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
204 multiple=args.multimer,
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
205 max_sequence_count=args.max_sequence_count,
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
206 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
207 clean_fastas = fv.validate(fas.fastas)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
208
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
209 # write clean data
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
210 fw = FastaWriter()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
211 for fas in clean_fastas:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
212 fw.write(fas)
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
213
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
214 sys.stderr.write("Validated FASTA sequence(s):\n\n")
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
215 for fas in clean_fastas:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
216 sys.stderr.write(fas.header + '\n')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
217 sys.stderr.write(fas.aa_seq + '\n\n')
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
218
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
219 except ValueError as exc:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
220 sys.stderr.write(f"{exc}\n\n")
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
221 raise exc
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
222
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
223 except Exception as exc:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
224 sys.stderr.write(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
225 "Input error: FASTA input is invalid. Please check your input.\n\n"
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
226 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
227 raise exc
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
228
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
229
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
230 def parse_args() -> argparse.Namespace:
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
231 parser = argparse.ArgumentParser()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
232 parser.add_argument(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
233 "input",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
234 help="input fasta file",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
235 type=str
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
236 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
237 parser.add_argument(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
238 "--min_length",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
239 dest='min_length',
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
240 help="Minimum length of input protein sequence (AA)",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
241 default=None,
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
242 type=int,
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
243 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
244 parser.add_argument(
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
245 "--max_length",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
246 dest='max_length',
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
247 help="Maximum length of input protein sequence (AA)",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
248 default=None,
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
249 type=int,
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
250 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
251 parser.add_argument(
19
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
252 "--max-sequences",
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
253 dest='max_sequence_count',
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
254 help="Maximum number of input sequences",
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
255 default=None,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
256 type=int,
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
257 )
2f7702fd0a4c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
galaxy-australia
parents: 18
diff changeset
258 parser.add_argument(
16
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
259 "--multimer",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
260 action='store_true',
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
261 help="Require multiple input sequences",
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
262 )
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
263 return parser.parse_args()
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
264
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
265
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
266 if __name__ == '__main__':
f9eb041c518c planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ee77734f1800350fa2a6ef28b2b8eade304a456f-dirty
galaxy-australia
parents:
diff changeset
267 main()