Mercurial > repos > galaxy-australia > alphafold2
comparison validate_fasta.py @ 8:ca90d17ff51b draft
"planemo upload for repository https://github.com/usegalaxy-au/tools-au commit 03537aada92b5fff565ff48dd47c81462c5df47e"
author | galaxy-australia |
---|---|
date | Fri, 19 Aug 2022 00:29:16 +0000 |
parents | eb085b3dbaf8 |
children | 3bd420ec162d |
comparison
equal
deleted
inserted
replaced
7:eb085b3dbaf8 | 8:ca90d17ff51b |
---|---|
1 """Validate input FASTA sequence.""" | 1 """Validate input FASTA sequence.""" |
2 | 2 |
3 import re | 3 import re |
4 import sys | 4 import sys |
5 import argparse | 5 import argparse |
6 from typing import List, TextIO | 6 from typing import List |
7 | 7 |
8 | 8 |
9 class Fasta: | 9 class Fasta: |
10 def __init__(self, header_str: str, seq_str: str): | 10 def __init__(self, header_str: str, seq_str: str): |
11 self.header = header_str | 11 self.header = header_str |
96 # self.validate_x() | 96 # self.validate_x() |
97 | 97 |
98 def validate_num_seqs(self) -> None: | 98 def validate_num_seqs(self) -> None: |
99 """Assert that only one sequence has been provided.""" | 99 """Assert that only one sequence has been provided.""" |
100 if len(self.fasta_list) > 1: | 100 if len(self.fasta_list) > 1: |
101 raise ValueError( | 101 sys.stderr.write( |
102 'Error encountered validating FASTA:\n' | 102 'WARNING: More than 1 sequence detected.' |
103 f' More than 1 sequence detected ({len(self.fasta_list)}).' | 103 ' Using first FASTA sequence as input.\n') |
104 ' Please use single FASTA sequence as input.') | 104 self.fasta_list = self.fasta_list[:1] |
105 elif len(self.fasta_list) == 0: | 105 elif len(self.fasta_list) == 0: |
106 raise ValueError( | 106 raise ValueError( |
107 'Error encountered validating FASTA:\n' | 107 'Error encountered validating FASTA:\n' |
108 ' input file has no FASTA sequences') | 108 ' input file has no FASTA sequences') |
109 | 109 |
157 | 157 |
158 def format_sequence(self, aa_seq: str): | 158 def format_sequence(self, aa_seq: str): |
159 formatted_seq = '' | 159 formatted_seq = '' |
160 for i in range(0, len(aa_seq), self.line_wrap): | 160 for i in range(0, len(aa_seq), self.line_wrap): |
161 formatted_seq += aa_seq[i: i + self.line_wrap] + '\n' | 161 formatted_seq += aa_seq[i: i + self.line_wrap] + '\n' |
162 return formatted_seq | 162 return formatted_seq.upper() |
163 | 163 |
164 | 164 |
165 def main(): | 165 def main(): |
166 # load fasta file | 166 # load fasta file |
167 try: | 167 try: |