changeset 19:2f7702fd0a4c draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit cd0379c8ecc24977dffa462c1897d402c85fa4e6
author galaxy-australia
date Wed, 08 May 2024 06:26:55 +0000
parents e4a053d67e24
children 6ab1a261520a
files alphafold.xml scripts/validate_fasta.py
diffstat 2 files changed, 24 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/alphafold.xml	Fri Sep 01 00:09:46 2023 +0000
+++ b/alphafold.xml	Wed May 08 06:26:55 2024 +0000
@@ -3,7 +3,7 @@
     <macros>
       <token name="@TOOL_VERSION@">2.3.1</token>
       <token name="@TOOL_MINOR_VERSION@">2.3</token>
-      <token name="@VERSION_SUFFIX@">4</token>
+      <token name="@VERSION_SUFFIX@">5</token>
       <import>macro_output.xml</import>
       <import>macro_test_output.xml</import>
     </macros>
@@ -48,6 +48,7 @@
 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0}
 #if $model_preset == 'multimer':
 --multimer
+--max-sequences \${ALPHAFOLD_MAX_SEQUENCES:-10}
 #end if
 > alphafold.fasta
 
--- a/scripts/validate_fasta.py	Fri Sep 01 00:09:46 2023 +0000
+++ b/scripts/validate_fasta.py	Wed May 08 06:26:55 2024 +0000
@@ -5,7 +5,7 @@
 import sys
 from typing import List
 
-MULTIMER_MAX_SEQUENCE_COUNT = 10
+DEFAULT_MAX_SEQUENCE_COUNT = 10
 STRIP_SEQUENCE_CHARS = ['\n', '\r', '\t', ' ']
 
 
@@ -77,10 +77,12 @@
 
 class FastaValidator:
     def __init__(
-            self,
-            min_length=None,
-            max_length=None,
-            multiple=False):
+        self,
+        min_length=None,
+        max_length=None,
+        multiple=False,
+        max_sequence_count=None,
+    ):
         self.multiple = multiple
         self.min_length = min_length
         self.max_length = max_length
@@ -90,6 +92,9 @@
             'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
             'Y', 'Z', '-'
         }
+        self.max_sequence_count = (
+            max_sequence_count
+            or DEFAULT_MAX_SEQUENCE_COUNT)
 
     def validate(self, fasta_list: List[Fasta]):
         """Perform FASTA validation."""
@@ -114,14 +119,11 @@
                     f' Only {fasta_count} sequences were detected in'
                     ' the provided file.')
 
-            elif fasta_count > MULTIMER_MAX_SEQUENCE_COUNT:
-                sys.stderr.write(
+            elif fasta_count > self.max_sequence_count:
+                raise ValueError(
                     f'WARNING: detected {fasta_count} sequences but the'
-                    f' maximum allowed is {MULTIMER_MAX_SEQUENCE_COUNT}'
-                    ' sequences. The last'
-                    f' {fasta_count - MULTIMER_MAX_SEQUENCE_COUNT} sequence(s)'
-                    ' have been discarded.\n')
-                self.fasta_list = self.fasta_list[:MULTIMER_MAX_SEQUENCE_COUNT]
+                    f' maximum allowed is {self.max_sequence_count}'
+                    ' sequences.')
         else:
             if fasta_count > 1:
                 sys.stderr.write(
@@ -200,6 +202,7 @@
             min_length=args.min_length,
             max_length=args.max_length,
             multiple=args.multimer,
+            max_sequence_count=args.max_sequence_count,
         )
         clean_fastas = fv.validate(fas.fastas)
 
@@ -246,6 +249,13 @@
         type=int,
     )
     parser.add_argument(
+        "--max-sequences",
+        dest='max_sequence_count',
+        help="Maximum number of input sequences",
+        default=None,
+        type=int,
+    )
+    parser.add_argument(
         "--multimer",
         action='store_true',
         help="Require multiple input sequences",