Mercurial > repos > galaxy-australia > alphafold2
comparison scripts/validate_fasta.py @ 18:e4a053d67e24 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit ef97511263dcac81f8563ae6a98d1db2400fcf1d
author | galaxy-australia |
---|---|
date | Fri, 01 Sep 2023 00:09:46 +0000 |
parents | f9eb041c518c |
children | 2f7702fd0a4c |
comparison
equal
deleted
inserted
replaced
17:5b85006245f3 | 18:e4a053d67e24 |
---|---|
4 import re | 4 import re |
5 import sys | 5 import sys |
6 from typing import List | 6 from typing import List |
7 | 7 |
8 MULTIMER_MAX_SEQUENCE_COUNT = 10 | 8 MULTIMER_MAX_SEQUENCE_COUNT = 10 |
9 STRIP_SEQUENCE_CHARS = ['\n', '\r', '\t', ' '] | |
9 | 10 |
10 | 11 |
11 class Fasta: | 12 class Fasta: |
12 def __init__(self, header_str: str, seq_str: str): | 13 def __init__(self, header_str: str, seq_str: str): |
13 self.header = header_str | 14 self.header = header_str |
64 if sequence: | 65 if sequence: |
65 # create generic header if not exists | 66 # create generic header if not exists |
66 if not header: | 67 if not header: |
67 fasta_count = len(self.fastas) | 68 fasta_count = len(self.fastas) |
68 header = f'>sequence_{fasta_count}' | 69 header = f'>sequence_{fasta_count}' |
70 | |
71 for char in STRIP_SEQUENCE_CHARS: | |
72 sequence = sequence.replace(char, '') | |
69 | 73 |
70 # Create new Fasta | 74 # Create new Fasta |
71 self.fastas.append(Fasta(header, sequence)) | 75 self.fastas.append(Fasta(header, sequence)) |
72 | 76 |
73 | 77 |
107 raise ValueError( | 111 raise ValueError( |
108 'Error encountered validating FASTA:\n' | 112 'Error encountered validating FASTA:\n' |
109 'Multimer mode requires multiple input sequence.' | 113 'Multimer mode requires multiple input sequence.' |
110 f' Only {fasta_count} sequences were detected in' | 114 f' Only {fasta_count} sequences were detected in' |
111 ' the provided file.') | 115 ' the provided file.') |
112 self.fasta_list = self.fasta_list | |
113 | 116 |
114 elif fasta_count > MULTIMER_MAX_SEQUENCE_COUNT: | 117 elif fasta_count > MULTIMER_MAX_SEQUENCE_COUNT: |
115 sys.stderr.write( | 118 sys.stderr.write( |
116 f'WARNING: detected {fasta_count} sequences but the' | 119 f'WARNING: detected {fasta_count} sequences but the' |
117 f' maximum allowed is {MULTIMER_MAX_SEQUENCE_COUNT}' | 120 f' maximum allowed is {MULTIMER_MAX_SEQUENCE_COUNT}' |